blob: 16236ef4263bc34e1166403ed629edbe09ac3031 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Senthil Kumaran8b7e1612014-09-19 15:23:30 +080013import ssl
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080014import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000015import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070016from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000017
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080018from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010019import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080020
Senthil Kumaran8b081b72013-04-10 20:53:12 -070021
Brett Cannon74bfd702003-04-25 09:39:47 +000022def hexescape(char):
23 """Escape char as RFC 2396 specifies"""
24 hex_repr = hex(ord(char))[2:].upper()
25 if len(hex_repr) == 1:
26 hex_repr = "0%s" % hex_repr
27 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000028
Jeremy Hylton1afc1692008-06-18 20:49:58 +000029# Shortcut for testing FancyURLopener
30_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070031
32
Jeremy Hylton1afc1692008-06-18 20:49:58 +000033def urlopen(url, data=None, proxies=None):
34 """urlopen(url [, data]) -> open file-like object"""
35 global _urlopener
36 if proxies is not None:
37 opener = urllib.request.FancyURLopener(proxies=proxies)
38 elif not _urlopener:
Ezio Melotti79b99db2013-02-21 02:41:42 +020039 with support.check_warnings(
40 ('FancyURLopener style of invoking requests is deprecated.',
41 DeprecationWarning)):
42 opener = urllib.request.FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030052def fakehttp(fakedata):
53 class FakeSocket(io.BytesIO):
54 io_refs = 1
55
56 def sendall(self, data):
57 FakeHTTPConnection.buf = data
58
59 def makefile(self, *args, **kwds):
60 self.io_refs += 1
61 return self
62
63 def read(self, amt=None):
64 if self.closed:
65 return b""
66 return io.BytesIO.read(self, amt)
67
68 def readline(self, length=None):
69 if self.closed:
70 return b""
71 return io.BytesIO.readline(self, length)
72
73 def close(self):
74 self.io_refs -= 1
75 if self.io_refs == 0:
76 io.BytesIO.close(self)
77
78 class FakeHTTPConnection(http.client.HTTPConnection):
79
80 # buffer to store data for verification in urlopen tests.
81 buf = None
82 fakesock = FakeSocket(fakedata)
83
84 def connect(self):
85 self.sock = self.fakesock
86
87 return FakeHTTPConnection
88
89
Senthil Kumarance260142011-11-01 01:35:17 +080090class FakeHTTPMixin(object):
91 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +080092 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030093 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +080094
95 def unfakehttp(self):
96 http.client.HTTPConnection = self._connection_class
97
98
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070099class FakeFTPMixin(object):
100 def fakeftp(self):
101 class FakeFtpWrapper(object):
102 def __init__(self, user, passwd, host, port, dirs, timeout=None,
103 persistent=True):
104 pass
105
106 def retrfile(self, file, type):
107 return io.BytesIO(), 0
108
109 def close(self):
110 pass
111
112 self._ftpwrapper_class = urllib.request.ftpwrapper
113 urllib.request.ftpwrapper = FakeFtpWrapper
114
115 def unfakeftp(self):
116 urllib.request.ftpwrapper = self._ftpwrapper_class
117
118
Brett Cannon74bfd702003-04-25 09:39:47 +0000119class urlopen_FileTests(unittest.TestCase):
120 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000121
Brett Cannon74bfd702003-04-25 09:39:47 +0000122 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000123 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000124
Brett Cannon74bfd702003-04-25 09:39:47 +0000125 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000126
Brett Cannon74bfd702003-04-25 09:39:47 +0000127 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000128 # Create a temp file to use for testing
129 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
130 "ascii")
131 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000132 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000134 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000136 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000137 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 def tearDown(self):
140 """Shut down the open object"""
141 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000142 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def test_interface(self):
145 # Make sure object returned by urlopen() has the specified methods
146 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000147 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000148 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 "object returned by urlopen() lacks %s attribute" %
150 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000151
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 def test_read(self):
153 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000154
Brett Cannon74bfd702003-04-25 09:39:47 +0000155 def test_readline(self):
156 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000157 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000158 "calling readline() after exhausting the file did not"
159 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_readlines(self):
162 lines_list = self.returned_obj.readlines()
163 self.assertEqual(len(lines_list), 1,
164 "readlines() returned the wrong number of lines")
165 self.assertEqual(lines_list[0], self.text,
166 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000167
Brett Cannon74bfd702003-04-25 09:39:47 +0000168 def test_fileno(self):
169 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000170 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000171 self.assertEqual(os.read(file_num, len(self.text)), self.text,
172 "Reading on the file descriptor returned by fileno() "
173 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000174
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800176 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000177 # by the tearDown() method for the test
178 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000179
Brett Cannon74bfd702003-04-25 09:39:47 +0000180 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000181 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_geturl(self):
184 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000185
Christian Heimes9bd667a2008-01-20 15:14:11 +0000186 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000187 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000188
Brett Cannon74bfd702003-04-25 09:39:47 +0000189 def test_iter(self):
190 # Test iterator
191 # Don't need to count number of iterations since test would fail the
192 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200193 # comparison.
194 # Use the iterator in the usual implicit way to test for ticket #4608.
195 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000196 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000197
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800198 def test_relativelocalfile(self):
199 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
200
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000201class ProxyTests(unittest.TestCase):
202
203 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000204 # Records changes to env vars
205 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000206 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000207 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000208 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000209 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000210
211 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000212 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000213 self.env.__exit__()
214 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000215
216 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000217 self.env.set('NO_PROXY', 'localhost')
218 proxies = urllib.request.getproxies_environment()
219 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000220 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800221 # List of no_proxies with space.
222 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
223 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000224
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700225class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000226 """Test urlopen() opening a fake http connection."""
227
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000228 def check_read(self, ver):
229 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000230 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000231 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000232 self.assertEqual(fp.readline(), b"Hello!")
233 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000234 self.assertEqual(fp.geturl(), 'http://python.org/')
235 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000236 finally:
237 self.unfakehttp()
238
Senthil Kumaran26430412011-04-13 07:01:19 +0800239 def test_url_fragment(self):
240 # Issue #11703: geturl() omits fragments in the original URL.
241 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800242 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800243 try:
244 fp = urllib.request.urlopen(url)
245 self.assertEqual(fp.geturl(), url)
246 finally:
247 self.unfakehttp()
248
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800249 def test_willclose(self):
250 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800251 try:
252 resp = urlopen("http://www.python.org")
253 self.assertTrue(resp.fp.will_close)
254 finally:
255 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800256
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000257 def test_read_0_9(self):
258 # "0.9" response accepted (but not "simple responses" without
259 # a status line)
260 self.check_read(b"0.9")
261
262 def test_read_1_0(self):
263 self.check_read(b"1.0")
264
265 def test_read_1_1(self):
266 self.check_read(b"1.1")
267
Christian Heimes57dddfb2008-01-02 18:30:52 +0000268 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200269 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000270 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
271Date: Wed, 02 Jan 2008 03:03:54 GMT
272Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
273Connection: close
274Content-Type: text/html; charset=iso-8859-1
275''')
276 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200277 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000278 finally:
279 self.unfakehttp()
280
guido@google.coma119df92011-03-29 11:41:02 -0700281 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200282 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700283 self.fakehttp(b'''HTTP/1.1 302 Found
284Date: Wed, 02 Jan 2008 03:03:54 GMT
285Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
286Location: file://guidocomputer.athome.com:/python/license
287Connection: close
288Content-Type: text/html; charset=iso-8859-1
289''')
290 try:
291 self.assertRaises(urllib.error.HTTPError, urlopen,
292 "http://python.org/")
293 finally:
294 self.unfakehttp()
295
Guido van Rossumd8faa362007-04-27 19:54:29 +0000296 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200297 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000298 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000299 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000300 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200301 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000302 finally:
303 self.unfakehttp()
304
Senthil Kumaranf5776862012-10-21 13:30:02 -0700305 def test_missing_localfile(self):
306 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700307 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700308 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700309 self.assertTrue(e.exception.filename)
310 self.assertTrue(e.exception.reason)
311
312 def test_file_notexists(self):
313 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700314 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700315 try:
316 self.assertTrue(os.path.exists(tmp_file))
317 with urlopen(tmp_fileurl) as fobj:
318 self.assertTrue(fobj)
319 finally:
320 os.close(fd)
321 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700322 self.assertFalse(os.path.exists(tmp_file))
323 with self.assertRaises(urllib.error.URLError):
324 urlopen(tmp_fileurl)
325
326 def test_ftp_nohost(self):
327 test_ftp_url = 'ftp:///path'
328 with self.assertRaises(urllib.error.URLError) as e:
329 urlopen(test_ftp_url)
330 self.assertFalse(e.exception.filename)
331 self.assertTrue(e.exception.reason)
332
333 def test_ftp_nonexisting(self):
334 with self.assertRaises(urllib.error.URLError) as e:
335 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
336 self.assertFalse(e.exception.filename)
337 self.assertTrue(e.exception.reason)
338
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700339 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
340 def test_ftp_cache_pruning(self):
341 self.fakeftp()
342 try:
343 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
344 urlopen('ftp://localhost')
345 finally:
346 self.unfakeftp()
347
Senthil Kumaranf5776862012-10-21 13:30:02 -0700348
Senthil Kumarande0eb242010-08-01 17:53:37 +0000349 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000350 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000351 try:
352 fp = urlopen("http://user:pass@python.org/")
353 self.assertEqual(fp.readline(), b"Hello!")
354 self.assertEqual(fp.readline(), b"")
355 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
356 self.assertEqual(fp.getcode(), 200)
357 finally:
358 self.unfakehttp()
359
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800360 def test_userpass_inurl_w_spaces(self):
361 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
362 try:
363 userpass = "a b:c d"
364 url = "http://{}@python.org/".format(userpass)
365 fakehttp_wrapper = http.client.HTTPConnection
366 authorization = ("Authorization: Basic %s\r\n" %
367 b64encode(userpass.encode("ASCII")).decode("ASCII"))
368 fp = urlopen(url)
369 # The authorization header must be in place
370 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
371 self.assertEqual(fp.readline(), b"Hello!")
372 self.assertEqual(fp.readline(), b"")
373 # the spaces are quoted in URL so no match
374 self.assertNotEqual(fp.geturl(), url)
375 self.assertEqual(fp.getcode(), 200)
376 finally:
377 self.unfakehttp()
378
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700379 def test_URLopener_deprecation(self):
380 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700381 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700382
Senthil Kumaran8b7e1612014-09-19 15:23:30 +0800383 def test_cafile_and_context(self):
384 context = ssl.create_default_context()
385 with self.assertRaises(ValueError):
386 urllib.request.urlopen(
387 "https://localhost", cafile="/nonexistent/path", context=context
388 )
389
Antoine Pitroudf204be2012-11-24 17:59:08 +0100390class urlopen_DataTests(unittest.TestCase):
391 """Test urlopen() opening a data URL."""
392
393 def setUp(self):
394 # text containing URL special- and unicode-characters
395 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
396 # 2x1 pixel RGB PNG image with one black and one white pixel
397 self.image = (
398 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
399 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
400 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
401 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
402
403 self.text_url = (
404 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
405 "D%26%20%C3%B6%20%C3%84%20")
406 self.text_url_base64 = (
407 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
408 "sJT0mIPYgxCA%3D")
409 # base64 encoded data URL that contains ignorable spaces,
410 # such as "\n", " ", "%0A", and "%20".
411 self.image_url = (
412 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
413 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
414 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
415
416 self.text_url_resp = urllib.request.urlopen(self.text_url)
417 self.text_url_base64_resp = urllib.request.urlopen(
418 self.text_url_base64)
419 self.image_url_resp = urllib.request.urlopen(self.image_url)
420
421 def test_interface(self):
422 # Make sure object returned by urlopen() has the specified methods
423 for attr in ("read", "readline", "readlines",
424 "close", "info", "geturl", "getcode", "__iter__"):
425 self.assertTrue(hasattr(self.text_url_resp, attr),
426 "object returned by urlopen() lacks %s attribute" %
427 attr)
428
429 def test_info(self):
430 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
431 self.assertEqual(self.text_url_base64_resp.info().get_params(),
432 [('text/plain', ''), ('charset', 'ISO-8859-1')])
433 self.assertEqual(self.image_url_resp.info()['content-length'],
434 str(len(self.image)))
435 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
436 [('text/plain', ''), ('charset', 'US-ASCII')])
437
438 def test_geturl(self):
439 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
440 self.assertEqual(self.text_url_base64_resp.geturl(),
441 self.text_url_base64)
442 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
443
444 def test_read_text(self):
445 self.assertEqual(self.text_url_resp.read().decode(
446 dict(self.text_url_resp.info().get_params())['charset']), self.text)
447
448 def test_read_text_base64(self):
449 self.assertEqual(self.text_url_base64_resp.read().decode(
450 dict(self.text_url_base64_resp.info().get_params())['charset']),
451 self.text)
452
453 def test_read_image(self):
454 self.assertEqual(self.image_url_resp.read(), self.image)
455
456 def test_missing_comma(self):
457 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
458
459 def test_invalid_base64_data(self):
460 # missing padding character
461 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
462
Brett Cannon19691362003-04-29 05:08:06 +0000463class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000464 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000465
Brett Cannon19691362003-04-29 05:08:06 +0000466 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000467 # Create a list of temporary files. Each item in the list is a file
468 # name (absolute path or relative to the current working directory).
469 # All files in this list will be deleted in the tearDown method. Note,
470 # this only helps to makes sure temporary files get deleted, but it
471 # does nothing about trying to close files that may still be open. It
472 # is the responsibility of the developer to properly close files even
473 # when exceptional conditions occur.
474 self.tempFiles = []
475
Brett Cannon19691362003-04-29 05:08:06 +0000476 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000477 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000478 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000479 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000480 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000481 FILE.write(self.text)
482 FILE.close()
483 finally:
484 try: FILE.close()
485 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000486
487 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000488 # Delete the temporary files.
489 for each in self.tempFiles:
490 try: os.remove(each)
491 except: pass
492
493 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000494 filePath = os.path.abspath(filePath)
495 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000496 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000497 except UnicodeEncodeError:
498 raise unittest.SkipTest("filePath is not encodable to utf8")
499 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000500
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000501 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000502 """Creates a new temporary file containing the specified data,
503 registers the file for deletion during the test fixture tear down, and
504 returns the absolute path of the file."""
505
506 newFd, newFilePath = tempfile.mkstemp()
507 try:
508 self.registerFileForCleanUp(newFilePath)
509 newFile = os.fdopen(newFd, "wb")
510 newFile.write(data)
511 newFile.close()
512 finally:
513 try: newFile.close()
514 except: pass
515 return newFilePath
516
517 def registerFileForCleanUp(self, fileName):
518 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000519
520 def test_basic(self):
521 # Make sure that a local file just gets its own location returned and
522 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000523 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000524 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000525 self.assertIsInstance(result[1], email.message.Message,
526 "did not get a email.message.Message instance "
527 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000528
529 def test_copy(self):
530 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000531 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000532 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000533 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000534 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000535 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000536 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000537 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000538 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000539 try:
540 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000541 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000542 finally:
543 try: FILE.close()
544 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000545 self.assertEqual(self.text, text)
546
547 def test_reporthook(self):
548 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700549 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
550 self.assertIsInstance(block_count, int)
551 self.assertIsInstance(block_read_size, int)
552 self.assertIsInstance(file_size, int)
553 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000554 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000555 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000556 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000557 urllib.request.urlretrieve(
558 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000559 second_temp, hooktester)
560
561 def test_reporthook_0_bytes(self):
562 # Test on zero length file. Should call reporthook only 1 time.
563 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700564 def hooktester(block_count, block_read_size, file_size, _report=report):
565 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000566 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000567 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000568 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000569 self.assertEqual(len(report), 1)
570 self.assertEqual(report[0][2], 0)
571
572 def test_reporthook_5_bytes(self):
573 # Test on 5 byte file. Should call reporthook only 2 times (once when
574 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700575 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000576 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700577 def hooktester(block_count, block_read_size, file_size, _report=report):
578 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000579 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000580 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000581 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000582 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800583 self.assertEqual(report[0][2], 5)
584 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000585
586 def test_reporthook_8193_bytes(self):
587 # Test on 8193 byte file. Should call reporthook only 3 times (once
588 # when the "network connection" is established, once for the next 8192
589 # bytes, and once for the last byte).
590 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700591 def hooktester(block_count, block_read_size, file_size, _report=report):
592 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000593 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000594 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000595 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000596 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800597 self.assertEqual(report[0][2], 8193)
598 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700599 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800600 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000601
Senthil Kumarance260142011-11-01 01:35:17 +0800602
603class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
604 """Test urllib.urlretrieve() using fake http connections"""
605
606 def test_short_content_raises_ContentTooShortError(self):
607 self.fakehttp(b'''HTTP/1.1 200 OK
608Date: Wed, 02 Jan 2008 03:03:54 GMT
609Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
610Connection: close
611Content-Length: 100
612Content-Type: text/html; charset=iso-8859-1
613
614FF
615''')
616
617 def _reporthook(par1, par2, par3):
618 pass
619
620 with self.assertRaises(urllib.error.ContentTooShortError):
621 try:
622 urllib.request.urlretrieve('http://example.com/',
623 reporthook=_reporthook)
624 finally:
625 self.unfakehttp()
626
627 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
628 self.fakehttp(b'''HTTP/1.1 200 OK
629Date: Wed, 02 Jan 2008 03:03:54 GMT
630Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
631Connection: close
632Content-Length: 100
633Content-Type: text/html; charset=iso-8859-1
634
635FF
636''')
637 with self.assertRaises(urllib.error.ContentTooShortError):
638 try:
639 urllib.request.urlretrieve('http://example.com/')
640 finally:
641 self.unfakehttp()
642
643
Brett Cannon74bfd702003-04-25 09:39:47 +0000644class QuotingTests(unittest.TestCase):
645 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000646
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000647 According to RFC 2396 (Uniform Resource Identifiers), to escape a
648 character you write it as '%' + <2 character US-ASCII hex value>.
649 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
650 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000651
652 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000653
Brett Cannon74bfd702003-04-25 09:39:47 +0000654 Reserved characters : ";/?:@&=+$,"
655 Have special meaning in URIs and must be escaped if not being used for
656 their special meaning
657 Data characters : letters, digits, and "-_.!~*'()"
658 Unreserved and do not need to be escaped; can be, though, if desired
659 Control characters : 0x00 - 0x1F, 0x7F
660 Have no use in URIs so must be escaped
661 space : 0x20
662 Must be escaped
663 Delimiters : '<>#%"'
664 Must be escaped
665 Unwise : "{}|\^[]`"
666 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000667
Brett Cannon74bfd702003-04-25 09:39:47 +0000668 """
669
670 def test_never_quote(self):
671 # Make sure quote() does not quote letters, digits, and "_,.-"
672 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
673 "abcdefghijklmnopqrstuvwxyz",
674 "0123456789",
675 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000676 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000677 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000678 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000679 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000680 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000681 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000682
683 def test_default_safe(self):
684 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000685 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000686
687 def test_safe(self):
688 # Test setting 'safe' parameter does what it should do
689 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000690 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000691 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000692 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000693 result = urllib.parse.quote_plus(quote_by_default,
694 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000695 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000696 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000697 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000698 # Safe expressed as bytes rather than str
699 result = urllib.parse.quote(quote_by_default, safe=b"<>")
700 self.assertEqual(quote_by_default, result,
701 "using quote(): %r != %r" % (quote_by_default, result))
702 # "Safe" non-ASCII characters should have no effect
703 # (Since URIs are not allowed to have non-ASCII characters)
704 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
705 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
706 self.assertEqual(expect, result,
707 "using quote(): %r != %r" %
708 (expect, result))
709 # Same as above, but using a bytes rather than str
710 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
711 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
712 self.assertEqual(expect, result,
713 "using quote(): %r != %r" %
714 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000715
716 def test_default_quoting(self):
717 # Make sure all characters that should be quoted are by default sans
718 # space (separate test for that).
719 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
720 should_quote.append('<>#%"{}|\^[]`')
721 should_quote.append(chr(127)) # For 0x7F
722 should_quote = ''.join(should_quote)
723 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000724 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000725 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000726 "using quote(): "
727 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000728 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000729 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000730 self.assertEqual(hexescape(char), result,
731 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000732 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000733 (char, hexescape(char), result))
734 del should_quote
735 partial_quote = "ab[]cd"
736 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000737 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000738 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000739 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800740 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000741 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000742 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000743
744 def test_quoting_space(self):
745 # Make sure quote() and quote_plus() handle spaces as specified in
746 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000747 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000748 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000749 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000751 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000752 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000753 given = "a b cd e f"
754 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000755 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000756 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000757 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000758 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000759 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000760 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000761 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000762
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000763 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000765 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000766 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000767 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000768 # Test with bytes
769 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
770 'alpha%2Bbeta+gamma')
771 # Test with safe bytes
772 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
773 'alpha+beta+gamma')
774
775 def test_quote_bytes(self):
776 # Bytes should quote directly to percent-encoded values
777 given = b"\xa2\xd8ab\xff"
778 expect = "%A2%D8ab%FF"
779 result = urllib.parse.quote(given)
780 self.assertEqual(expect, result,
781 "using quote(): %r != %r" % (expect, result))
782 # Encoding argument should raise type error on bytes input
783 self.assertRaises(TypeError, urllib.parse.quote, given,
784 encoding="latin-1")
785 # quote_from_bytes should work the same
786 result = urllib.parse.quote_from_bytes(given)
787 self.assertEqual(expect, result,
788 "using quote_from_bytes(): %r != %r"
789 % (expect, result))
790
791 def test_quote_with_unicode(self):
792 # Characters in Latin-1 range, encoded by default in UTF-8
793 given = "\xa2\xd8ab\xff"
794 expect = "%C2%A2%C3%98ab%C3%BF"
795 result = urllib.parse.quote(given)
796 self.assertEqual(expect, result,
797 "using quote(): %r != %r" % (expect, result))
798 # Characters in Latin-1 range, encoded by with None (default)
799 result = urllib.parse.quote(given, encoding=None, errors=None)
800 self.assertEqual(expect, result,
801 "using quote(): %r != %r" % (expect, result))
802 # Characters in Latin-1 range, encoded with Latin-1
803 given = "\xa2\xd8ab\xff"
804 expect = "%A2%D8ab%FF"
805 result = urllib.parse.quote(given, encoding="latin-1")
806 self.assertEqual(expect, result,
807 "using quote(): %r != %r" % (expect, result))
808 # Characters in BMP, encoded by default in UTF-8
809 given = "\u6f22\u5b57" # "Kanji"
810 expect = "%E6%BC%A2%E5%AD%97"
811 result = urllib.parse.quote(given)
812 self.assertEqual(expect, result,
813 "using quote(): %r != %r" % (expect, result))
814 # Characters in BMP, encoded with Latin-1
815 given = "\u6f22\u5b57"
816 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
817 encoding="latin-1")
818 # Characters in BMP, encoded with Latin-1, with replace error handling
819 given = "\u6f22\u5b57"
820 expect = "%3F%3F" # "??"
821 result = urllib.parse.quote(given, encoding="latin-1",
822 errors="replace")
823 self.assertEqual(expect, result,
824 "using quote(): %r != %r" % (expect, result))
825 # Characters in BMP, Latin-1, with xmlcharref error handling
826 given = "\u6f22\u5b57"
827 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
828 result = urllib.parse.quote(given, encoding="latin-1",
829 errors="xmlcharrefreplace")
830 self.assertEqual(expect, result,
831 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000832
Georg Brandlfaf41492009-05-26 18:31:11 +0000833 def test_quote_plus_with_unicode(self):
834 # Encoding (latin-1) test for quote_plus
835 given = "\xa2\xd8 \xff"
836 expect = "%A2%D8+%FF"
837 result = urllib.parse.quote_plus(given, encoding="latin-1")
838 self.assertEqual(expect, result,
839 "using quote_plus(): %r != %r" % (expect, result))
840 # Errors test for quote_plus
841 given = "ab\u6f22\u5b57 cd"
842 expect = "ab%3F%3F+cd"
843 result = urllib.parse.quote_plus(given, encoding="latin-1",
844 errors="replace")
845 self.assertEqual(expect, result,
846 "using quote_plus(): %r != %r" % (expect, result))
847
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000848
Brett Cannon74bfd702003-04-25 09:39:47 +0000849class UnquotingTests(unittest.TestCase):
850 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000851
Brett Cannon74bfd702003-04-25 09:39:47 +0000852 See the doc string for quoting_Tests for details on quoting and such.
853
854 """
855
856 def test_unquoting(self):
857 # Make sure unquoting of all ASCII values works
858 escape_list = []
859 for num in range(128):
860 given = hexescape(chr(num))
861 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000862 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000863 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000864 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000865 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000866 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000867 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000868 (expect, result))
869 escape_list.append(given)
870 escape_string = ''.join(escape_list)
871 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000872 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000873 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000874 "using unquote(): not all characters escaped: "
875 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000876 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
877 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000878 with support.check_warnings(('', BytesWarning), quiet=True):
879 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000880
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000881 def test_unquoting_badpercent(self):
882 # Test unquoting on bad percent-escapes
883 given = '%xab'
884 expect = given
885 result = urllib.parse.unquote(given)
886 self.assertEqual(expect, result, "using unquote(): %r != %r"
887 % (expect, result))
888 given = '%x'
889 expect = given
890 result = urllib.parse.unquote(given)
891 self.assertEqual(expect, result, "using unquote(): %r != %r"
892 % (expect, result))
893 given = '%'
894 expect = given
895 result = urllib.parse.unquote(given)
896 self.assertEqual(expect, result, "using unquote(): %r != %r"
897 % (expect, result))
898 # unquote_to_bytes
899 given = '%xab'
900 expect = bytes(given, 'ascii')
901 result = urllib.parse.unquote_to_bytes(given)
902 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
903 % (expect, result))
904 given = '%x'
905 expect = bytes(given, 'ascii')
906 result = urllib.parse.unquote_to_bytes(given)
907 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
908 % (expect, result))
909 given = '%'
910 expect = bytes(given, 'ascii')
911 result = urllib.parse.unquote_to_bytes(given)
912 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
913 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000914 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
915 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000916
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000917 def test_unquoting_mixed_case(self):
918 # Test unquoting on mixed-case hex digits in the percent-escapes
919 given = '%Ab%eA'
920 expect = b'\xab\xea'
921 result = urllib.parse.unquote_to_bytes(given)
922 self.assertEqual(expect, result,
923 "using unquote_to_bytes(): %r != %r"
924 % (expect, result))
925
Brett Cannon74bfd702003-04-25 09:39:47 +0000926 def test_unquoting_parts(self):
927 # Make sure unquoting works when have non-quoted characters
928 # interspersed
929 given = 'ab%sd' % hexescape('c')
930 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000931 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000932 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000933 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000934 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000935 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000936 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000937
Brett Cannon74bfd702003-04-25 09:39:47 +0000938 def test_unquoting_plus(self):
939 # Test difference between unquote() and unquote_plus()
940 given = "are+there+spaces..."
941 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000942 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000943 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000944 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000945 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000946 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000947 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000948 "using unquote_plus(): %r != %r" % (expect, result))
949
950 def test_unquote_to_bytes(self):
951 given = 'br%C3%BCckner_sapporo_20050930.doc'
952 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
953 result = urllib.parse.unquote_to_bytes(given)
954 self.assertEqual(expect, result,
955 "using unquote_to_bytes(): %r != %r"
956 % (expect, result))
957 # Test on a string with unescaped non-ASCII characters
958 # (Technically an invalid URI; expect those characters to be UTF-8
959 # encoded).
960 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
961 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
962 self.assertEqual(expect, result,
963 "using unquote_to_bytes(): %r != %r"
964 % (expect, result))
965 # Test with a bytes as input
966 given = b'%A2%D8ab%FF'
967 expect = b'\xa2\xd8ab\xff'
968 result = urllib.parse.unquote_to_bytes(given)
969 self.assertEqual(expect, result,
970 "using unquote_to_bytes(): %r != %r"
971 % (expect, result))
972 # Test with a bytes as input, with unescaped non-ASCII bytes
973 # (Technically an invalid URI; expect those bytes to be preserved)
974 given = b'%A2\xd8ab%FF'
975 expect = b'\xa2\xd8ab\xff'
976 result = urllib.parse.unquote_to_bytes(given)
977 self.assertEqual(expect, result,
978 "using unquote_to_bytes(): %r != %r"
979 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000980
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000981 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000982 # Characters in the Latin-1 range, encoded with UTF-8
983 given = 'br%C3%BCckner_sapporo_20050930.doc'
984 expect = 'br\u00fcckner_sapporo_20050930.doc'
985 result = urllib.parse.unquote(given)
986 self.assertEqual(expect, result,
987 "using unquote(): %r != %r" % (expect, result))
988 # Characters in the Latin-1 range, encoded with None (default)
989 result = urllib.parse.unquote(given, encoding=None, errors=None)
990 self.assertEqual(expect, result,
991 "using unquote(): %r != %r" % (expect, result))
992
993 # Characters in the Latin-1 range, encoded with Latin-1
994 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
995 encoding="latin-1")
996 expect = 'br\u00fcckner_sapporo_20050930.doc'
997 self.assertEqual(expect, result,
998 "using unquote(): %r != %r" % (expect, result))
999
1000 # Characters in BMP, encoded with UTF-8
1001 given = "%E6%BC%A2%E5%AD%97"
1002 expect = "\u6f22\u5b57" # "Kanji"
1003 result = urllib.parse.unquote(given)
1004 self.assertEqual(expect, result,
1005 "using unquote(): %r != %r" % (expect, result))
1006
1007 # Decode with UTF-8, invalid sequence
1008 given = "%F3%B1"
1009 expect = "\ufffd" # Replacement character
1010 result = urllib.parse.unquote(given)
1011 self.assertEqual(expect, result,
1012 "using unquote(): %r != %r" % (expect, result))
1013
1014 # Decode with UTF-8, invalid sequence, replace errors
1015 result = urllib.parse.unquote(given, errors="replace")
1016 self.assertEqual(expect, result,
1017 "using unquote(): %r != %r" % (expect, result))
1018
1019 # Decode with UTF-8, invalid sequence, ignoring errors
1020 given = "%F3%B1"
1021 expect = ""
1022 result = urllib.parse.unquote(given, errors="ignore")
1023 self.assertEqual(expect, result,
1024 "using unquote(): %r != %r" % (expect, result))
1025
1026 # A mix of non-ASCII and percent-encoded characters, UTF-8
1027 result = urllib.parse.unquote("\u6f22%C3%BC")
1028 expect = '\u6f22\u00fc'
1029 self.assertEqual(expect, result,
1030 "using unquote(): %r != %r" % (expect, result))
1031
1032 # A mix of non-ASCII and percent-encoded characters, Latin-1
1033 # (Note, the string contains non-Latin-1-representable characters)
1034 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1035 expect = '\u6f22\u00fc'
1036 self.assertEqual(expect, result,
1037 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001038
Brett Cannon74bfd702003-04-25 09:39:47 +00001039class urlencode_Tests(unittest.TestCase):
1040 """Tests for urlencode()"""
1041
1042 def help_inputtype(self, given, test_type):
1043 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001044
Brett Cannon74bfd702003-04-25 09:39:47 +00001045 'given' must lead to only the pairs:
1046 * 1st, 1
1047 * 2nd, 2
1048 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001049
Brett Cannon74bfd702003-04-25 09:39:47 +00001050 Test cannot assume anything about order. Docs make no guarantee and
1051 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001052
Brett Cannon74bfd702003-04-25 09:39:47 +00001053 """
1054 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001055 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001056 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001057 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001058 "testing %s: %s not found in %s" %
1059 (test_type, expected, result))
1060 self.assertEqual(result.count('&'), 2,
1061 "testing %s: expected 2 '&'s; got %s" %
1062 (test_type, result.count('&')))
1063 amp_location = result.index('&')
1064 on_amp_left = result[amp_location - 1]
1065 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001066 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001067 "testing %s: '&' not located in proper place in %s" %
1068 (test_type, result))
1069 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1070 "testing %s: "
1071 "unexpected number of characters: %s != %s" %
1072 (test_type, len(result), (5 * 3) + 2))
1073
1074 def test_using_mapping(self):
1075 # Test passing in a mapping object as an argument.
1076 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1077 "using dict as input type")
1078
1079 def test_using_sequence(self):
1080 # Test passing in a sequence of two-item sequences as an argument.
1081 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1082 "using sequence of two-item tuples as input")
1083
1084 def test_quoting(self):
1085 # Make sure keys and values are quoted using quote_plus()
1086 given = {"&":"="}
1087 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001088 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001089 self.assertEqual(expect, result)
1090 given = {"key name":"A bunch of pluses"}
1091 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001092 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001093 self.assertEqual(expect, result)
1094
1095 def test_doseq(self):
1096 # Test that passing True for 'doseq' parameter works correctly
1097 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001098 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1099 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001100 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001101 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001102 for value in given["sequence"]:
1103 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001104 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001105 self.assertEqual(result.count('&'), 2,
1106 "Expected 2 '&'s, got %s" % result.count('&'))
1107
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001108 def test_empty_sequence(self):
1109 self.assertEqual("", urllib.parse.urlencode({}))
1110 self.assertEqual("", urllib.parse.urlencode([]))
1111
1112 def test_nonstring_values(self):
1113 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1114 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1115
1116 def test_nonstring_seq_values(self):
1117 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1118 self.assertEqual("a=None&a=a",
1119 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001120 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001121 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001122 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001123
Senthil Kumarandf022da2010-07-03 17:48:22 +00001124 def test_urlencode_encoding(self):
1125 # ASCII encoding. Expect %3F with errors="replace'
1126 given = (('\u00a0', '\u00c1'),)
1127 expect = '%3F=%3F'
1128 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1129 self.assertEqual(expect, result)
1130
1131 # Default is UTF-8 encoding.
1132 given = (('\u00a0', '\u00c1'),)
1133 expect = '%C2%A0=%C3%81'
1134 result = urllib.parse.urlencode(given)
1135 self.assertEqual(expect, result)
1136
1137 # Latin-1 encoding.
1138 given = (('\u00a0', '\u00c1'),)
1139 expect = '%A0=%C1'
1140 result = urllib.parse.urlencode(given, encoding="latin-1")
1141 self.assertEqual(expect, result)
1142
1143 def test_urlencode_encoding_doseq(self):
1144 # ASCII Encoding. Expect %3F with errors="replace'
1145 given = (('\u00a0', '\u00c1'),)
1146 expect = '%3F=%3F'
1147 result = urllib.parse.urlencode(given, doseq=True,
1148 encoding="ASCII", errors="replace")
1149 self.assertEqual(expect, result)
1150
1151 # ASCII Encoding. On a sequence of values.
1152 given = (("\u00a0", (1, "\u00c1")),)
1153 expect = '%3F=1&%3F=%3F'
1154 result = urllib.parse.urlencode(given, True,
1155 encoding="ASCII", errors="replace")
1156 self.assertEqual(expect, result)
1157
1158 # Utf-8
1159 given = (("\u00a0", "\u00c1"),)
1160 expect = '%C2%A0=%C3%81'
1161 result = urllib.parse.urlencode(given, True)
1162 self.assertEqual(expect, result)
1163
1164 given = (("\u00a0", (42, "\u00c1")),)
1165 expect = '%C2%A0=42&%C2%A0=%C3%81'
1166 result = urllib.parse.urlencode(given, True)
1167 self.assertEqual(expect, result)
1168
1169 # latin-1
1170 given = (("\u00a0", "\u00c1"),)
1171 expect = '%A0=%C1'
1172 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1173 self.assertEqual(expect, result)
1174
1175 given = (("\u00a0", (42, "\u00c1")),)
1176 expect = '%A0=42&%A0=%C1'
1177 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1178 self.assertEqual(expect, result)
1179
1180 def test_urlencode_bytes(self):
1181 given = ((b'\xa0\x24', b'\xc1\x24'),)
1182 expect = '%A0%24=%C1%24'
1183 result = urllib.parse.urlencode(given)
1184 self.assertEqual(expect, result)
1185 result = urllib.parse.urlencode(given, True)
1186 self.assertEqual(expect, result)
1187
1188 # Sequence of values
1189 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1190 expect = '%A0%24=42&%A0%24=%C1%24'
1191 result = urllib.parse.urlencode(given, True)
1192 self.assertEqual(expect, result)
1193
1194 def test_urlencode_encoding_safe_parameter(self):
1195
1196 # Send '$' (\x24) as safe character
1197 # Default utf-8 encoding
1198
1199 given = ((b'\xa0\x24', b'\xc1\x24'),)
1200 result = urllib.parse.urlencode(given, safe=":$")
1201 expect = '%A0$=%C1$'
1202 self.assertEqual(expect, result)
1203
1204 given = ((b'\xa0\x24', b'\xc1\x24'),)
1205 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1206 expect = '%A0$=%C1$'
1207 self.assertEqual(expect, result)
1208
1209 # Safe parameter in sequence
1210 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1211 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1212 result = urllib.parse.urlencode(given, True, safe=":$")
1213 self.assertEqual(expect, result)
1214
1215 # Test all above in latin-1 encoding
1216
1217 given = ((b'\xa0\x24', b'\xc1\x24'),)
1218 result = urllib.parse.urlencode(given, safe=":$",
1219 encoding="latin-1")
1220 expect = '%A0$=%C1$'
1221 self.assertEqual(expect, result)
1222
1223 given = ((b'\xa0\x24', b'\xc1\x24'),)
1224 expect = '%A0$=%C1$'
1225 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1226 encoding="latin-1")
1227
1228 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1229 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1230 result = urllib.parse.urlencode(given, True, safe=":$",
1231 encoding="latin-1")
1232 self.assertEqual(expect, result)
1233
Brett Cannon74bfd702003-04-25 09:39:47 +00001234class Pathname_Tests(unittest.TestCase):
1235 """Test pathname2url() and url2pathname()"""
1236
1237 def test_basic(self):
1238 # Make sure simple tests pass
1239 expected_path = os.path.join("parts", "of", "a", "path")
1240 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001241 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001242 self.assertEqual(expected_url, result,
1243 "pathname2url() failed; %s != %s" %
1244 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001245 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001246 self.assertEqual(expected_path, result,
1247 "url2pathame() failed; %s != %s" %
1248 (result, expected_path))
1249
1250 def test_quoting(self):
1251 # Test automatic quoting and unquoting works for pathnam2url() and
1252 # url2pathname() respectively
1253 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001254 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1255 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001256 self.assertEqual(expect, result,
1257 "pathname2url() failed; %s != %s" %
1258 (expect, result))
1259 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001260 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001261 self.assertEqual(expect, result,
1262 "url2pathname() failed; %s != %s" %
1263 (expect, result))
1264 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001265 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1266 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001267 self.assertEqual(expect, result,
1268 "pathname2url() failed; %s != %s" %
1269 (expect, result))
1270 given = "make+sure/using_unquote"
1271 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001272 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001273 self.assertEqual(expect, result,
1274 "url2pathname() failed; %s != %s" %
1275 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001276
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001277 @unittest.skipUnless(sys.platform == 'win32',
1278 'test specific to the urllib.url2path function.')
1279 def test_ntpath(self):
1280 given = ('/C:/', '///C:/', '/C|//')
1281 expect = 'C:\\'
1282 for url in given:
1283 result = urllib.request.url2pathname(url)
1284 self.assertEqual(expect, result,
1285 'urllib.request..url2pathname() failed; %s != %s' %
1286 (expect, result))
1287 given = '///C|/path'
1288 expect = 'C:\\path'
1289 result = urllib.request.url2pathname(given)
1290 self.assertEqual(expect, result,
1291 'urllib.request.url2pathname() failed; %s != %s' %
1292 (expect, result))
1293
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001294class Utility_Tests(unittest.TestCase):
1295 """Testcase to test the various utility functions in the urllib."""
1296
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001297 def test_thishost(self):
1298 """Test the urllib.request.thishost utility function returns a tuple"""
1299 self.assertIsInstance(urllib.request.thishost(), tuple)
1300
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001301
1302class URLopener_Tests(unittest.TestCase):
1303 """Testcase to test the open method of URLopener class."""
1304
1305 def test_quoted_open(self):
1306 class DummyURLopener(urllib.request.URLopener):
1307 def open_spam(self, url):
1308 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001309 with support.check_warnings(
1310 ('DummyURLopener style of invoking requests is deprecated.',
1311 DeprecationWarning)):
1312 self.assertEqual(DummyURLopener().open(
1313 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001314
Ezio Melotti79b99db2013-02-21 02:41:42 +02001315 # test the safe characters are not quoted by urlopen
1316 self.assertEqual(DummyURLopener().open(
1317 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1318 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001319
Guido van Rossume7ba4952007-06-06 23:52:48 +00001320# Just commented them out.
1321# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001322# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001323# fail in one of the tests, sometimes in other. I have a linux, and
1324# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001325# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001326# . Facundo
1327#
1328# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001329# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001330# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1331# serv.settimeout(3)
1332# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1333# serv.bind(("", 9093))
1334# serv.listen(5)
1335# try:
1336# conn, addr = serv.accept()
1337# conn.send("1 Hola mundo\n")
1338# cantdata = 0
1339# while cantdata < 13:
1340# data = conn.recv(13-cantdata)
1341# cantdata += len(data)
1342# time.sleep(.3)
1343# conn.send("2 No more lines\n")
1344# conn.close()
1345# except socket.timeout:
1346# pass
1347# finally:
1348# serv.close()
1349# evt.set()
1350#
1351# class FTPWrapperTests(unittest.TestCase):
1352#
1353# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001354# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001355# ftplib.FTP.port = 9093
1356# self.evt = threading.Event()
1357# threading.Thread(target=server, args=(self.evt,)).start()
1358# time.sleep(.1)
1359#
1360# def tearDown(self):
1361# self.evt.wait()
1362#
1363# def testBasic(self):
1364# # connects
1365# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001366# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001367#
1368# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001369# # global default timeout is ignored
1370# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001371# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001372# socket.setdefaulttimeout(30)
1373# try:
1374# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1375# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001376# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001377# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001378# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001379#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001380# def testTimeoutDefault(self):
1381# # global default timeout is used
1382# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001383# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001384# socket.setdefaulttimeout(30)
1385# try:
1386# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1387# finally:
1388# socket.setdefaulttimeout(None)
1389# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1390# ftp.close()
1391#
1392# def testTimeoutValue(self):
1393# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1394# timeout=30)
1395# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1396# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001397
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001398
Senthil Kumarande49d642011-10-16 23:54:44 +08001399class RequestTests(unittest.TestCase):
1400 """Unit tests for urllib.request.Request."""
1401
1402 def test_default_values(self):
1403 Request = urllib.request.Request
1404 request = Request("http://www.python.org")
1405 self.assertEqual(request.get_method(), 'GET')
1406 request = Request("http://www.python.org", {})
1407 self.assertEqual(request.get_method(), 'POST')
1408
1409 def test_with_method_arg(self):
1410 Request = urllib.request.Request
1411 request = Request("http://www.python.org", method='HEAD')
1412 self.assertEqual(request.method, 'HEAD')
1413 self.assertEqual(request.get_method(), 'HEAD')
1414 request = Request("http://www.python.org", {}, method='HEAD')
1415 self.assertEqual(request.method, 'HEAD')
1416 self.assertEqual(request.get_method(), 'HEAD')
1417 request = Request("http://www.python.org", method='GET')
1418 self.assertEqual(request.get_method(), 'GET')
1419 request.method = 'HEAD'
1420 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001421
1422
Senthil Kumaran277e9092013-04-10 20:51:19 -07001423class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001424
Senthil Kumaran277e9092013-04-10 20:51:19 -07001425 def test_converting_drive_letter(self):
1426 self.assertEqual(url2pathname("///C|"), 'C:')
1427 self.assertEqual(url2pathname("///C:"), 'C:')
1428 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001429
Senthil Kumaran277e9092013-04-10 20:51:19 -07001430 def test_converting_when_no_drive_letter(self):
1431 # cannot end a raw string in \
1432 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1433 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1434
1435 def test_simple_compare(self):
1436 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1437 r'C:\foo\bar\spam.foo')
1438
1439 def test_non_ascii_drive_letter(self):
1440 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1441
1442 def test_roundtrip_url2pathname(self):
1443 list_of_paths = ['C:',
1444 r'\\\C\test\\',
1445 r'C:\foo\bar\spam.foo'
1446 ]
1447 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001448 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001449
1450class PathName2URLTests(unittest.TestCase):
1451
1452 def test_converting_drive_letter(self):
1453 self.assertEqual(pathname2url("C:"), '///C:')
1454 self.assertEqual(pathname2url("C:\\"), '///C:')
1455
1456 def test_converting_when_no_drive_letter(self):
1457 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1458 '/////folder/test/')
1459 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1460 '////folder/test/')
1461 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1462 '/folder/test/')
1463
1464 def test_simple_compare(self):
1465 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1466 "///C:/foo/bar/spam.foo" )
1467
1468 def test_long_drive_letter(self):
1469 self.assertRaises(IOError, pathname2url, "XX:\\")
1470
1471 def test_roundtrip_pathname2url(self):
1472 list_of_paths = ['///C:',
1473 '/////folder/test/',
1474 '///C:/foo/bar/spam.foo']
1475 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001476 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001477
1478if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001479 unittest.main()