blob: 2ac73b58d832064f8328d6d06eb90135056c7c59 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030059def fakehttp(fakedata):
60 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
93 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030094
95 return FakeHTTPConnection
96
97
Senthil Kumarance260142011-11-01 01:35:17 +080098class FakeHTTPMixin(object):
99 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +0800100 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300101 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +0800102
103 def unfakehttp(self):
104 http.client.HTTPConnection = self._connection_class
105
106
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700107class FakeFTPMixin(object):
108 def fakeftp(self):
109 class FakeFtpWrapper(object):
110 def __init__(self, user, passwd, host, port, dirs, timeout=None,
111 persistent=True):
112 pass
113
114 def retrfile(self, file, type):
115 return io.BytesIO(), 0
116
117 def close(self):
118 pass
119
120 self._ftpwrapper_class = urllib.request.ftpwrapper
121 urllib.request.ftpwrapper = FakeFtpWrapper
122
123 def unfakeftp(self):
124 urllib.request.ftpwrapper = self._ftpwrapper_class
125
126
Brett Cannon74bfd702003-04-25 09:39:47 +0000127class urlopen_FileTests(unittest.TestCase):
128 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000129
Brett Cannon74bfd702003-04-25 09:39:47 +0000130 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000131 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000132
Brett Cannon74bfd702003-04-25 09:39:47 +0000133 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000134
Brett Cannon74bfd702003-04-25 09:39:47 +0000135 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 # Create a temp file to use for testing
137 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
138 "ascii")
139 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000140 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000143 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000144 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000146
Brett Cannon74bfd702003-04-25 09:39:47 +0000147 def tearDown(self):
148 """Shut down the open object"""
149 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000150 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000151
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 def test_interface(self):
153 # Make sure object returned by urlopen() has the specified methods
154 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000155 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000156 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000157 "object returned by urlopen() lacks %s attribute" %
158 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000159
Brett Cannon74bfd702003-04-25 09:39:47 +0000160 def test_read(self):
161 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000162
Brett Cannon74bfd702003-04-25 09:39:47 +0000163 def test_readline(self):
164 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000165 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "calling readline() after exhausting the file did not"
167 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_readlines(self):
170 lines_list = self.returned_obj.readlines()
171 self.assertEqual(len(lines_list), 1,
172 "readlines() returned the wrong number of lines")
173 self.assertEqual(lines_list[0], self.text,
174 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000175
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 def test_fileno(self):
177 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000178 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000179 self.assertEqual(os.read(file_num, len(self.text)), self.text,
180 "Reading on the file descriptor returned by fileno() "
181 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800184 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 # by the tearDown() method for the test
186 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000187
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000189 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000190
Brett Cannon74bfd702003-04-25 09:39:47 +0000191 def test_geturl(self):
192 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000193
Christian Heimes9bd667a2008-01-20 15:14:11 +0000194 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000195 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_iter(self):
198 # Test iterator
199 # Don't need to count number of iterations since test would fail the
200 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200201 # comparison.
202 # Use the iterator in the usual implicit way to test for ticket #4608.
203 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000204 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000205
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800206 def test_relativelocalfile(self):
207 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
208
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700209
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000210class ProxyTests(unittest.TestCase):
211
212 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000213 # Records changes to env vars
214 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000215 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000216 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000217 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000218 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000219
220 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000221 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000222 self.env.__exit__()
223 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000224
225 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000226 self.env.set('NO_PROXY', 'localhost')
227 proxies = urllib.request.getproxies_environment()
228 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000229 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800230 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700231 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800232 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700233 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
234 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
235
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700236 def test_proxy_cgi_ignore(self):
237 try:
238 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
239 proxies = urllib.request.getproxies_environment()
240 self.assertEqual('http://somewhere:3128', proxies['http'])
241 self.env.set('REQUEST_METHOD', 'GET')
242 proxies = urllib.request.getproxies_environment()
243 self.assertNotIn('http', proxies)
244 finally:
245 self.env.unset('REQUEST_METHOD')
246 self.env.unset('HTTP_PROXY')
247
Martin Panteraa279822016-04-30 01:03:40 +0000248 def test_proxy_bypass_environment_host_match(self):
249 bypass = urllib.request.proxy_bypass_environment
250 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800251 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000252 self.assertTrue(bypass('localhost'))
253 self.assertTrue(bypass('LocalHost')) # MixedCase
254 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
255 self.assertTrue(bypass('newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800256 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Martin Panteraa279822016-04-30 01:03:40 +0000257 self.assertTrue(bypass('anotherdomain.com:8888'))
258 self.assertTrue(bypass('www.newdomain.com:1234'))
259 self.assertFalse(bypass('prelocalhost'))
260 self.assertFalse(bypass('newdomain.com')) # no port
261 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700262
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700263
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700264class ProxyTests_withOrderedEnv(unittest.TestCase):
265
266 def setUp(self):
267 # We need to test conditions, where variable order _is_ significant
268 self._saved_env = os.environ
269 # Monkey patch os.environ, start with empty fake environment
270 os.environ = collections.OrderedDict()
271
272 def tearDown(self):
273 os.environ = self._saved_env
274
275 def test_getproxies_environment_prefer_lowercase(self):
276 # Test lowercase preference with removal
277 os.environ['no_proxy'] = ''
278 os.environ['No_Proxy'] = 'localhost'
279 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
280 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
281 os.environ['http_proxy'] = ''
282 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
283 proxies = urllib.request.getproxies_environment()
284 self.assertEqual({}, proxies)
285 # Test lowercase preference of proxy bypass and correct matching including ports
286 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
287 os.environ['No_Proxy'] = 'xyz.com'
288 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
289 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
290 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
291 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
292 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
293 # Test lowercase preference with replacement
294 os.environ['http_proxy'] = 'http://somewhere:3128'
295 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
296 proxies = urllib.request.getproxies_environment()
297 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000298
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700299
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700300class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000301 """Test urlopen() opening a fake http connection."""
302
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000303 def check_read(self, ver):
304 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000305 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000306 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000307 self.assertEqual(fp.readline(), b"Hello!")
308 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000309 self.assertEqual(fp.geturl(), 'http://python.org/')
310 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000311 finally:
312 self.unfakehttp()
313
Senthil Kumaran26430412011-04-13 07:01:19 +0800314 def test_url_fragment(self):
315 # Issue #11703: geturl() omits fragments in the original URL.
316 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800317 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800318 try:
319 fp = urllib.request.urlopen(url)
320 self.assertEqual(fp.geturl(), url)
321 finally:
322 self.unfakehttp()
323
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800324 def test_willclose(self):
325 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800326 try:
327 resp = urlopen("http://www.python.org")
328 self.assertTrue(resp.fp.will_close)
329 finally:
330 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800331
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000332 def test_read_0_9(self):
333 # "0.9" response accepted (but not "simple responses" without
334 # a status line)
335 self.check_read(b"0.9")
336
337 def test_read_1_0(self):
338 self.check_read(b"1.0")
339
340 def test_read_1_1(self):
341 self.check_read(b"1.1")
342
Christian Heimes57dddfb2008-01-02 18:30:52 +0000343 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200344 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000345 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
346Date: Wed, 02 Jan 2008 03:03:54 GMT
347Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
348Connection: close
349Content-Type: text/html; charset=iso-8859-1
350''')
351 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200352 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000353 finally:
354 self.unfakehttp()
355
guido@google.coma119df92011-03-29 11:41:02 -0700356 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200357 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700358 self.fakehttp(b'''HTTP/1.1 302 Found
359Date: Wed, 02 Jan 2008 03:03:54 GMT
360Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
361Location: file://guidocomputer.athome.com:/python/license
362Connection: close
363Content-Type: text/html; charset=iso-8859-1
364''')
365 try:
Martin Pantera0370222016-02-04 06:01:35 +0000366 msg = "Redirection to url 'file:"
367 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
368 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700369 finally:
370 self.unfakehttp()
371
Martin Pantera0370222016-02-04 06:01:35 +0000372 def test_redirect_limit_independent(self):
373 # Ticket #12923: make sure independent requests each use their
374 # own retry limit.
375 for i in range(FancyURLopener().maxtries):
376 self.fakehttp(b'''HTTP/1.1 302 Found
377Location: file://guidocomputer.athome.com:/python/license
378Connection: close
379''')
380 try:
381 self.assertRaises(urllib.error.HTTPError, urlopen,
382 "http://something")
383 finally:
384 self.unfakehttp()
385
Guido van Rossumd8faa362007-04-27 19:54:29 +0000386 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200387 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000388 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000389 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000390 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200391 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000392 finally:
393 self.unfakehttp()
394
Senthil Kumaranf5776862012-10-21 13:30:02 -0700395 def test_missing_localfile(self):
396 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700397 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700398 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700399 self.assertTrue(e.exception.filename)
400 self.assertTrue(e.exception.reason)
401
402 def test_file_notexists(self):
403 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700404 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700405 try:
406 self.assertTrue(os.path.exists(tmp_file))
407 with urlopen(tmp_fileurl) as fobj:
408 self.assertTrue(fobj)
409 finally:
410 os.close(fd)
411 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700412 self.assertFalse(os.path.exists(tmp_file))
413 with self.assertRaises(urllib.error.URLError):
414 urlopen(tmp_fileurl)
415
416 def test_ftp_nohost(self):
417 test_ftp_url = 'ftp:///path'
418 with self.assertRaises(urllib.error.URLError) as e:
419 urlopen(test_ftp_url)
420 self.assertFalse(e.exception.filename)
421 self.assertTrue(e.exception.reason)
422
423 def test_ftp_nonexisting(self):
424 with self.assertRaises(urllib.error.URLError) as e:
425 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
426 self.assertFalse(e.exception.filename)
427 self.assertTrue(e.exception.reason)
428
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700429 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
430 def test_ftp_cache_pruning(self):
431 self.fakeftp()
432 try:
433 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
434 urlopen('ftp://localhost')
435 finally:
436 self.unfakeftp()
437
Senthil Kumarande0eb242010-08-01 17:53:37 +0000438 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000439 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000440 try:
441 fp = urlopen("http://user:pass@python.org/")
442 self.assertEqual(fp.readline(), b"Hello!")
443 self.assertEqual(fp.readline(), b"")
444 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
445 self.assertEqual(fp.getcode(), 200)
446 finally:
447 self.unfakehttp()
448
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800449 def test_userpass_inurl_w_spaces(self):
450 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
451 try:
452 userpass = "a b:c d"
453 url = "http://{}@python.org/".format(userpass)
454 fakehttp_wrapper = http.client.HTTPConnection
455 authorization = ("Authorization: Basic %s\r\n" %
456 b64encode(userpass.encode("ASCII")).decode("ASCII"))
457 fp = urlopen(url)
458 # The authorization header must be in place
459 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
460 self.assertEqual(fp.readline(), b"Hello!")
461 self.assertEqual(fp.readline(), b"")
462 # the spaces are quoted in URL so no match
463 self.assertNotEqual(fp.geturl(), url)
464 self.assertEqual(fp.getcode(), 200)
465 finally:
466 self.unfakehttp()
467
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700468 def test_URLopener_deprecation(self):
469 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700470 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700471
Antoine Pitrou07df6552014-11-02 17:23:14 +0100472 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800473 def test_cafile_and_context(self):
474 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200475 with support.check_warnings(('', DeprecationWarning)):
476 with self.assertRaises(ValueError):
477 urllib.request.urlopen(
478 "https://localhost", cafile="/nonexistent/path", context=context
479 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800480
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700481
Antoine Pitroudf204be2012-11-24 17:59:08 +0100482class urlopen_DataTests(unittest.TestCase):
483 """Test urlopen() opening a data URL."""
484
485 def setUp(self):
486 # text containing URL special- and unicode-characters
487 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
488 # 2x1 pixel RGB PNG image with one black and one white pixel
489 self.image = (
490 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
491 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
492 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
493 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
494
495 self.text_url = (
496 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
497 "D%26%20%C3%B6%20%C3%84%20")
498 self.text_url_base64 = (
499 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
500 "sJT0mIPYgxCA%3D")
501 # base64 encoded data URL that contains ignorable spaces,
502 # such as "\n", " ", "%0A", and "%20".
503 self.image_url = (
504 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
505 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
506 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
507
508 self.text_url_resp = urllib.request.urlopen(self.text_url)
509 self.text_url_base64_resp = urllib.request.urlopen(
510 self.text_url_base64)
511 self.image_url_resp = urllib.request.urlopen(self.image_url)
512
513 def test_interface(self):
514 # Make sure object returned by urlopen() has the specified methods
515 for attr in ("read", "readline", "readlines",
516 "close", "info", "geturl", "getcode", "__iter__"):
517 self.assertTrue(hasattr(self.text_url_resp, attr),
518 "object returned by urlopen() lacks %s attribute" %
519 attr)
520
521 def test_info(self):
522 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
523 self.assertEqual(self.text_url_base64_resp.info().get_params(),
524 [('text/plain', ''), ('charset', 'ISO-8859-1')])
525 self.assertEqual(self.image_url_resp.info()['content-length'],
526 str(len(self.image)))
527 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
528 [('text/plain', ''), ('charset', 'US-ASCII')])
529
530 def test_geturl(self):
531 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
532 self.assertEqual(self.text_url_base64_resp.geturl(),
533 self.text_url_base64)
534 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
535
536 def test_read_text(self):
537 self.assertEqual(self.text_url_resp.read().decode(
538 dict(self.text_url_resp.info().get_params())['charset']), self.text)
539
540 def test_read_text_base64(self):
541 self.assertEqual(self.text_url_base64_resp.read().decode(
542 dict(self.text_url_base64_resp.info().get_params())['charset']),
543 self.text)
544
545 def test_read_image(self):
546 self.assertEqual(self.image_url_resp.read(), self.image)
547
548 def test_missing_comma(self):
549 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
550
551 def test_invalid_base64_data(self):
552 # missing padding character
553 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
554
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700555
Brett Cannon19691362003-04-29 05:08:06 +0000556class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000557 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000558
Brett Cannon19691362003-04-29 05:08:06 +0000559 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000560 # Create a list of temporary files. Each item in the list is a file
561 # name (absolute path or relative to the current working directory).
562 # All files in this list will be deleted in the tearDown method. Note,
563 # this only helps to makes sure temporary files get deleted, but it
564 # does nothing about trying to close files that may still be open. It
565 # is the responsibility of the developer to properly close files even
566 # when exceptional conditions occur.
567 self.tempFiles = []
568
Brett Cannon19691362003-04-29 05:08:06 +0000569 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000570 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000571 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000572 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000573 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000574 FILE.write(self.text)
575 FILE.close()
576 finally:
577 try: FILE.close()
578 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000579
580 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000581 # Delete the temporary files.
582 for each in self.tempFiles:
583 try: os.remove(each)
584 except: pass
585
586 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000587 filePath = os.path.abspath(filePath)
588 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000589 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000590 except UnicodeEncodeError:
591 raise unittest.SkipTest("filePath is not encodable to utf8")
592 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000593
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000594 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000595 """Creates a new temporary file containing the specified data,
596 registers the file for deletion during the test fixture tear down, and
597 returns the absolute path of the file."""
598
599 newFd, newFilePath = tempfile.mkstemp()
600 try:
601 self.registerFileForCleanUp(newFilePath)
602 newFile = os.fdopen(newFd, "wb")
603 newFile.write(data)
604 newFile.close()
605 finally:
606 try: newFile.close()
607 except: pass
608 return newFilePath
609
610 def registerFileForCleanUp(self, fileName):
611 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000612
613 def test_basic(self):
614 # Make sure that a local file just gets its own location returned and
615 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000616 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000617 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000618 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000619 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000620 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000621
622 def test_copy(self):
623 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000624 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000625 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000626 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000627 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000628 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000629 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000630 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000631 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000632 try:
633 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000634 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000635 finally:
636 try: FILE.close()
637 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000638 self.assertEqual(self.text, text)
639
640 def test_reporthook(self):
641 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700642 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
643 self.assertIsInstance(block_count, int)
644 self.assertIsInstance(block_read_size, int)
645 self.assertIsInstance(file_size, int)
646 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000647 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000648 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000649 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000650 urllib.request.urlretrieve(
651 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000652 second_temp, hooktester)
653
654 def test_reporthook_0_bytes(self):
655 # Test on zero length file. Should call reporthook only 1 time.
656 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700657 def hooktester(block_count, block_read_size, file_size, _report=report):
658 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000659 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000660 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000661 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000662 self.assertEqual(len(report), 1)
663 self.assertEqual(report[0][2], 0)
664
665 def test_reporthook_5_bytes(self):
666 # Test on 5 byte file. Should call reporthook only 2 times (once when
667 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700668 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000669 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700670 def hooktester(block_count, block_read_size, file_size, _report=report):
671 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000672 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000673 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000674 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000675 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800676 self.assertEqual(report[0][2], 5)
677 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000678
679 def test_reporthook_8193_bytes(self):
680 # Test on 8193 byte file. Should call reporthook only 3 times (once
681 # when the "network connection" is established, once for the next 8192
682 # bytes, and once for the last byte).
683 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700684 def hooktester(block_count, block_read_size, file_size, _report=report):
685 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000686 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000687 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000688 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000689 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800690 self.assertEqual(report[0][2], 8193)
691 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700692 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800693 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000694
Senthil Kumarance260142011-11-01 01:35:17 +0800695
696class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
697 """Test urllib.urlretrieve() using fake http connections"""
698
699 def test_short_content_raises_ContentTooShortError(self):
700 self.fakehttp(b'''HTTP/1.1 200 OK
701Date: Wed, 02 Jan 2008 03:03:54 GMT
702Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
703Connection: close
704Content-Length: 100
705Content-Type: text/html; charset=iso-8859-1
706
707FF
708''')
709
710 def _reporthook(par1, par2, par3):
711 pass
712
713 with self.assertRaises(urllib.error.ContentTooShortError):
714 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100715 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800716 reporthook=_reporthook)
717 finally:
718 self.unfakehttp()
719
720 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
721 self.fakehttp(b'''HTTP/1.1 200 OK
722Date: Wed, 02 Jan 2008 03:03:54 GMT
723Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
724Connection: close
725Content-Length: 100
726Content-Type: text/html; charset=iso-8859-1
727
728FF
729''')
730 with self.assertRaises(urllib.error.ContentTooShortError):
731 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100732 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800733 finally:
734 self.unfakehttp()
735
736
Brett Cannon74bfd702003-04-25 09:39:47 +0000737class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400738 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000739
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530740 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000741 character you write it as '%' + <2 character US-ASCII hex value>.
742 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
743 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000744
745 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000746
Brett Cannon74bfd702003-04-25 09:39:47 +0000747 Reserved characters : ";/?:@&=+$,"
748 Have special meaning in URIs and must be escaped if not being used for
749 their special meaning
750 Data characters : letters, digits, and "-_.!~*'()"
751 Unreserved and do not need to be escaped; can be, though, if desired
752 Control characters : 0x00 - 0x1F, 0x7F
753 Have no use in URIs so must be escaped
754 space : 0x20
755 Must be escaped
756 Delimiters : '<>#%"'
757 Must be escaped
758 Unwise : "{}|\^[]`"
759 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000760
Brett Cannon74bfd702003-04-25 09:39:47 +0000761 """
762
763 def test_never_quote(self):
764 # Make sure quote() does not quote letters, digits, and "_,.-"
765 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
766 "abcdefghijklmnopqrstuvwxyz",
767 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530768 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000769 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000770 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000771 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000772 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000773 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000774 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000775
776 def test_default_safe(self):
777 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000778 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000779
780 def test_safe(self):
781 # Test setting 'safe' parameter does what it should do
782 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000783 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000784 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000785 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000786 result = urllib.parse.quote_plus(quote_by_default,
787 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000788 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000789 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000790 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000791 # Safe expressed as bytes rather than str
792 result = urllib.parse.quote(quote_by_default, safe=b"<>")
793 self.assertEqual(quote_by_default, result,
794 "using quote(): %r != %r" % (quote_by_default, result))
795 # "Safe" non-ASCII characters should have no effect
796 # (Since URIs are not allowed to have non-ASCII characters)
797 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
798 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
799 self.assertEqual(expect, result,
800 "using quote(): %r != %r" %
801 (expect, result))
802 # Same as above, but using a bytes rather than str
803 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
804 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
805 self.assertEqual(expect, result,
806 "using quote(): %r != %r" %
807 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000808
809 def test_default_quoting(self):
810 # Make sure all characters that should be quoted are by default sans
811 # space (separate test for that).
812 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400813 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000814 should_quote.append(chr(127)) # For 0x7F
815 should_quote = ''.join(should_quote)
816 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000817 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000818 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000819 "using quote(): "
820 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000821 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000823 self.assertEqual(hexescape(char), result,
824 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000825 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000826 (char, hexescape(char), result))
827 del should_quote
828 partial_quote = "ab[]cd"
829 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000830 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000831 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000832 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800833 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000834 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000835 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000836
837 def test_quoting_space(self):
838 # Make sure quote() and quote_plus() handle spaces as specified in
839 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000840 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000841 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000842 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000843 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000844 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000845 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000846 given = "a b cd e f"
847 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000848 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000849 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000850 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000851 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000852 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000853 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000854 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000855
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000856 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000857 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000858 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000859 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000860 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000861 # Test with bytes
862 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
863 'alpha%2Bbeta+gamma')
864 # Test with safe bytes
865 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
866 'alpha+beta+gamma')
867
868 def test_quote_bytes(self):
869 # Bytes should quote directly to percent-encoded values
870 given = b"\xa2\xd8ab\xff"
871 expect = "%A2%D8ab%FF"
872 result = urllib.parse.quote(given)
873 self.assertEqual(expect, result,
874 "using quote(): %r != %r" % (expect, result))
875 # Encoding argument should raise type error on bytes input
876 self.assertRaises(TypeError, urllib.parse.quote, given,
877 encoding="latin-1")
878 # quote_from_bytes should work the same
879 result = urllib.parse.quote_from_bytes(given)
880 self.assertEqual(expect, result,
881 "using quote_from_bytes(): %r != %r"
882 % (expect, result))
883
884 def test_quote_with_unicode(self):
885 # Characters in Latin-1 range, encoded by default in UTF-8
886 given = "\xa2\xd8ab\xff"
887 expect = "%C2%A2%C3%98ab%C3%BF"
888 result = urllib.parse.quote(given)
889 self.assertEqual(expect, result,
890 "using quote(): %r != %r" % (expect, result))
891 # Characters in Latin-1 range, encoded by with None (default)
892 result = urllib.parse.quote(given, encoding=None, errors=None)
893 self.assertEqual(expect, result,
894 "using quote(): %r != %r" % (expect, result))
895 # Characters in Latin-1 range, encoded with Latin-1
896 given = "\xa2\xd8ab\xff"
897 expect = "%A2%D8ab%FF"
898 result = urllib.parse.quote(given, encoding="latin-1")
899 self.assertEqual(expect, result,
900 "using quote(): %r != %r" % (expect, result))
901 # Characters in BMP, encoded by default in UTF-8
902 given = "\u6f22\u5b57" # "Kanji"
903 expect = "%E6%BC%A2%E5%AD%97"
904 result = urllib.parse.quote(given)
905 self.assertEqual(expect, result,
906 "using quote(): %r != %r" % (expect, result))
907 # Characters in BMP, encoded with Latin-1
908 given = "\u6f22\u5b57"
909 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
910 encoding="latin-1")
911 # Characters in BMP, encoded with Latin-1, with replace error handling
912 given = "\u6f22\u5b57"
913 expect = "%3F%3F" # "??"
914 result = urllib.parse.quote(given, encoding="latin-1",
915 errors="replace")
916 self.assertEqual(expect, result,
917 "using quote(): %r != %r" % (expect, result))
918 # Characters in BMP, Latin-1, with xmlcharref error handling
919 given = "\u6f22\u5b57"
920 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
921 result = urllib.parse.quote(given, encoding="latin-1",
922 errors="xmlcharrefreplace")
923 self.assertEqual(expect, result,
924 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000925
Georg Brandlfaf41492009-05-26 18:31:11 +0000926 def test_quote_plus_with_unicode(self):
927 # Encoding (latin-1) test for quote_plus
928 given = "\xa2\xd8 \xff"
929 expect = "%A2%D8+%FF"
930 result = urllib.parse.quote_plus(given, encoding="latin-1")
931 self.assertEqual(expect, result,
932 "using quote_plus(): %r != %r" % (expect, result))
933 # Errors test for quote_plus
934 given = "ab\u6f22\u5b57 cd"
935 expect = "ab%3F%3F+cd"
936 result = urllib.parse.quote_plus(given, encoding="latin-1",
937 errors="replace")
938 self.assertEqual(expect, result,
939 "using quote_plus(): %r != %r" % (expect, result))
940
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000941
Brett Cannon74bfd702003-04-25 09:39:47 +0000942class UnquotingTests(unittest.TestCase):
943 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000944
Brett Cannon74bfd702003-04-25 09:39:47 +0000945 See the doc string for quoting_Tests for details on quoting and such.
946
947 """
948
949 def test_unquoting(self):
950 # Make sure unquoting of all ASCII values works
951 escape_list = []
952 for num in range(128):
953 given = hexescape(chr(num))
954 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000955 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000956 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000957 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000958 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000959 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000960 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000961 (expect, result))
962 escape_list.append(given)
963 escape_string = ''.join(escape_list)
964 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000966 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000967 "using unquote(): not all characters escaped: "
968 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000969 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
970 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000971 with support.check_warnings(('', BytesWarning), quiet=True):
972 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000973
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000974 def test_unquoting_badpercent(self):
975 # Test unquoting on bad percent-escapes
976 given = '%xab'
977 expect = given
978 result = urllib.parse.unquote(given)
979 self.assertEqual(expect, result, "using unquote(): %r != %r"
980 % (expect, result))
981 given = '%x'
982 expect = given
983 result = urllib.parse.unquote(given)
984 self.assertEqual(expect, result, "using unquote(): %r != %r"
985 % (expect, result))
986 given = '%'
987 expect = given
988 result = urllib.parse.unquote(given)
989 self.assertEqual(expect, result, "using unquote(): %r != %r"
990 % (expect, result))
991 # unquote_to_bytes
992 given = '%xab'
993 expect = bytes(given, 'ascii')
994 result = urllib.parse.unquote_to_bytes(given)
995 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
996 % (expect, result))
997 given = '%x'
998 expect = bytes(given, 'ascii')
999 result = urllib.parse.unquote_to_bytes(given)
1000 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1001 % (expect, result))
1002 given = '%'
1003 expect = bytes(given, 'ascii')
1004 result = urllib.parse.unquote_to_bytes(given)
1005 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1006 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001007 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1008 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001009
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001010 def test_unquoting_mixed_case(self):
1011 # Test unquoting on mixed-case hex digits in the percent-escapes
1012 given = '%Ab%eA'
1013 expect = b'\xab\xea'
1014 result = urllib.parse.unquote_to_bytes(given)
1015 self.assertEqual(expect, result,
1016 "using unquote_to_bytes(): %r != %r"
1017 % (expect, result))
1018
Brett Cannon74bfd702003-04-25 09:39:47 +00001019 def test_unquoting_parts(self):
1020 # Make sure unquoting works when have non-quoted characters
1021 # interspersed
1022 given = 'ab%sd' % hexescape('c')
1023 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001024 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001025 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001026 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001027 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001028 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001029 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001030
Brett Cannon74bfd702003-04-25 09:39:47 +00001031 def test_unquoting_plus(self):
1032 # Test difference between unquote() and unquote_plus()
1033 given = "are+there+spaces..."
1034 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001035 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001036 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001037 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001038 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001039 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001040 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001041 "using unquote_plus(): %r != %r" % (expect, result))
1042
1043 def test_unquote_to_bytes(self):
1044 given = 'br%C3%BCckner_sapporo_20050930.doc'
1045 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1046 result = urllib.parse.unquote_to_bytes(given)
1047 self.assertEqual(expect, result,
1048 "using unquote_to_bytes(): %r != %r"
1049 % (expect, result))
1050 # Test on a string with unescaped non-ASCII characters
1051 # (Technically an invalid URI; expect those characters to be UTF-8
1052 # encoded).
1053 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1054 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1055 self.assertEqual(expect, result,
1056 "using unquote_to_bytes(): %r != %r"
1057 % (expect, result))
1058 # Test with a bytes as input
1059 given = b'%A2%D8ab%FF'
1060 expect = b'\xa2\xd8ab\xff'
1061 result = urllib.parse.unquote_to_bytes(given)
1062 self.assertEqual(expect, result,
1063 "using unquote_to_bytes(): %r != %r"
1064 % (expect, result))
1065 # Test with a bytes as input, with unescaped non-ASCII bytes
1066 # (Technically an invalid URI; expect those bytes to be preserved)
1067 given = b'%A2\xd8ab%FF'
1068 expect = b'\xa2\xd8ab\xff'
1069 result = urllib.parse.unquote_to_bytes(given)
1070 self.assertEqual(expect, result,
1071 "using unquote_to_bytes(): %r != %r"
1072 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001073
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001074 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001075 # Characters in the Latin-1 range, encoded with UTF-8
1076 given = 'br%C3%BCckner_sapporo_20050930.doc'
1077 expect = 'br\u00fcckner_sapporo_20050930.doc'
1078 result = urllib.parse.unquote(given)
1079 self.assertEqual(expect, result,
1080 "using unquote(): %r != %r" % (expect, result))
1081 # Characters in the Latin-1 range, encoded with None (default)
1082 result = urllib.parse.unquote(given, encoding=None, errors=None)
1083 self.assertEqual(expect, result,
1084 "using unquote(): %r != %r" % (expect, result))
1085
1086 # Characters in the Latin-1 range, encoded with Latin-1
1087 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1088 encoding="latin-1")
1089 expect = 'br\u00fcckner_sapporo_20050930.doc'
1090 self.assertEqual(expect, result,
1091 "using unquote(): %r != %r" % (expect, result))
1092
1093 # Characters in BMP, encoded with UTF-8
1094 given = "%E6%BC%A2%E5%AD%97"
1095 expect = "\u6f22\u5b57" # "Kanji"
1096 result = urllib.parse.unquote(given)
1097 self.assertEqual(expect, result,
1098 "using unquote(): %r != %r" % (expect, result))
1099
1100 # Decode with UTF-8, invalid sequence
1101 given = "%F3%B1"
1102 expect = "\ufffd" # Replacement character
1103 result = urllib.parse.unquote(given)
1104 self.assertEqual(expect, result,
1105 "using unquote(): %r != %r" % (expect, result))
1106
1107 # Decode with UTF-8, invalid sequence, replace errors
1108 result = urllib.parse.unquote(given, errors="replace")
1109 self.assertEqual(expect, result,
1110 "using unquote(): %r != %r" % (expect, result))
1111
1112 # Decode with UTF-8, invalid sequence, ignoring errors
1113 given = "%F3%B1"
1114 expect = ""
1115 result = urllib.parse.unquote(given, errors="ignore")
1116 self.assertEqual(expect, result,
1117 "using unquote(): %r != %r" % (expect, result))
1118
1119 # A mix of non-ASCII and percent-encoded characters, UTF-8
1120 result = urllib.parse.unquote("\u6f22%C3%BC")
1121 expect = '\u6f22\u00fc'
1122 self.assertEqual(expect, result,
1123 "using unquote(): %r != %r" % (expect, result))
1124
1125 # A mix of non-ASCII and percent-encoded characters, Latin-1
1126 # (Note, the string contains non-Latin-1-representable characters)
1127 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1128 expect = '\u6f22\u00fc'
1129 self.assertEqual(expect, result,
1130 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001131
Brett Cannon74bfd702003-04-25 09:39:47 +00001132class urlencode_Tests(unittest.TestCase):
1133 """Tests for urlencode()"""
1134
1135 def help_inputtype(self, given, test_type):
1136 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001137
Brett Cannon74bfd702003-04-25 09:39:47 +00001138 'given' must lead to only the pairs:
1139 * 1st, 1
1140 * 2nd, 2
1141 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001142
Brett Cannon74bfd702003-04-25 09:39:47 +00001143 Test cannot assume anything about order. Docs make no guarantee and
1144 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001145
Brett Cannon74bfd702003-04-25 09:39:47 +00001146 """
1147 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001148 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001149 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001150 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001151 "testing %s: %s not found in %s" %
1152 (test_type, expected, result))
1153 self.assertEqual(result.count('&'), 2,
1154 "testing %s: expected 2 '&'s; got %s" %
1155 (test_type, result.count('&')))
1156 amp_location = result.index('&')
1157 on_amp_left = result[amp_location - 1]
1158 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001159 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001160 "testing %s: '&' not located in proper place in %s" %
1161 (test_type, result))
1162 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1163 "testing %s: "
1164 "unexpected number of characters: %s != %s" %
1165 (test_type, len(result), (5 * 3) + 2))
1166
1167 def test_using_mapping(self):
1168 # Test passing in a mapping object as an argument.
1169 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1170 "using dict as input type")
1171
1172 def test_using_sequence(self):
1173 # Test passing in a sequence of two-item sequences as an argument.
1174 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1175 "using sequence of two-item tuples as input")
1176
1177 def test_quoting(self):
1178 # Make sure keys and values are quoted using quote_plus()
1179 given = {"&":"="}
1180 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001181 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001182 self.assertEqual(expect, result)
1183 given = {"key name":"A bunch of pluses"}
1184 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001185 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001186 self.assertEqual(expect, result)
1187
1188 def test_doseq(self):
1189 # Test that passing True for 'doseq' parameter works correctly
1190 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001191 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1192 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001193 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001194 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001195 for value in given["sequence"]:
1196 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001197 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001198 self.assertEqual(result.count('&'), 2,
1199 "Expected 2 '&'s, got %s" % result.count('&'))
1200
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001201 def test_empty_sequence(self):
1202 self.assertEqual("", urllib.parse.urlencode({}))
1203 self.assertEqual("", urllib.parse.urlencode([]))
1204
1205 def test_nonstring_values(self):
1206 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1207 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1208
1209 def test_nonstring_seq_values(self):
1210 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1211 self.assertEqual("a=None&a=a",
1212 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001213 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001214 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001215 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001216
Senthil Kumarandf022da2010-07-03 17:48:22 +00001217 def test_urlencode_encoding(self):
1218 # ASCII encoding. Expect %3F with errors="replace'
1219 given = (('\u00a0', '\u00c1'),)
1220 expect = '%3F=%3F'
1221 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1222 self.assertEqual(expect, result)
1223
1224 # Default is UTF-8 encoding.
1225 given = (('\u00a0', '\u00c1'),)
1226 expect = '%C2%A0=%C3%81'
1227 result = urllib.parse.urlencode(given)
1228 self.assertEqual(expect, result)
1229
1230 # Latin-1 encoding.
1231 given = (('\u00a0', '\u00c1'),)
1232 expect = '%A0=%C1'
1233 result = urllib.parse.urlencode(given, encoding="latin-1")
1234 self.assertEqual(expect, result)
1235
1236 def test_urlencode_encoding_doseq(self):
1237 # ASCII Encoding. Expect %3F with errors="replace'
1238 given = (('\u00a0', '\u00c1'),)
1239 expect = '%3F=%3F'
1240 result = urllib.parse.urlencode(given, doseq=True,
1241 encoding="ASCII", errors="replace")
1242 self.assertEqual(expect, result)
1243
1244 # ASCII Encoding. On a sequence of values.
1245 given = (("\u00a0", (1, "\u00c1")),)
1246 expect = '%3F=1&%3F=%3F'
1247 result = urllib.parse.urlencode(given, True,
1248 encoding="ASCII", errors="replace")
1249 self.assertEqual(expect, result)
1250
1251 # Utf-8
1252 given = (("\u00a0", "\u00c1"),)
1253 expect = '%C2%A0=%C3%81'
1254 result = urllib.parse.urlencode(given, True)
1255 self.assertEqual(expect, result)
1256
1257 given = (("\u00a0", (42, "\u00c1")),)
1258 expect = '%C2%A0=42&%C2%A0=%C3%81'
1259 result = urllib.parse.urlencode(given, True)
1260 self.assertEqual(expect, result)
1261
1262 # latin-1
1263 given = (("\u00a0", "\u00c1"),)
1264 expect = '%A0=%C1'
1265 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1266 self.assertEqual(expect, result)
1267
1268 given = (("\u00a0", (42, "\u00c1")),)
1269 expect = '%A0=42&%A0=%C1'
1270 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1271 self.assertEqual(expect, result)
1272
1273 def test_urlencode_bytes(self):
1274 given = ((b'\xa0\x24', b'\xc1\x24'),)
1275 expect = '%A0%24=%C1%24'
1276 result = urllib.parse.urlencode(given)
1277 self.assertEqual(expect, result)
1278 result = urllib.parse.urlencode(given, True)
1279 self.assertEqual(expect, result)
1280
1281 # Sequence of values
1282 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1283 expect = '%A0%24=42&%A0%24=%C1%24'
1284 result = urllib.parse.urlencode(given, True)
1285 self.assertEqual(expect, result)
1286
1287 def test_urlencode_encoding_safe_parameter(self):
1288
1289 # Send '$' (\x24) as safe character
1290 # Default utf-8 encoding
1291
1292 given = ((b'\xa0\x24', b'\xc1\x24'),)
1293 result = urllib.parse.urlencode(given, safe=":$")
1294 expect = '%A0$=%C1$'
1295 self.assertEqual(expect, result)
1296
1297 given = ((b'\xa0\x24', b'\xc1\x24'),)
1298 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1299 expect = '%A0$=%C1$'
1300 self.assertEqual(expect, result)
1301
1302 # Safe parameter in sequence
1303 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1304 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1305 result = urllib.parse.urlencode(given, True, safe=":$")
1306 self.assertEqual(expect, result)
1307
1308 # Test all above in latin-1 encoding
1309
1310 given = ((b'\xa0\x24', b'\xc1\x24'),)
1311 result = urllib.parse.urlencode(given, safe=":$",
1312 encoding="latin-1")
1313 expect = '%A0$=%C1$'
1314 self.assertEqual(expect, result)
1315
1316 given = ((b'\xa0\x24', b'\xc1\x24'),)
1317 expect = '%A0$=%C1$'
1318 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1319 encoding="latin-1")
1320
1321 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1322 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1323 result = urllib.parse.urlencode(given, True, safe=":$",
1324 encoding="latin-1")
1325 self.assertEqual(expect, result)
1326
Brett Cannon74bfd702003-04-25 09:39:47 +00001327class Pathname_Tests(unittest.TestCase):
1328 """Test pathname2url() and url2pathname()"""
1329
1330 def test_basic(self):
1331 # Make sure simple tests pass
1332 expected_path = os.path.join("parts", "of", "a", "path")
1333 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001334 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001335 self.assertEqual(expected_url, result,
1336 "pathname2url() failed; %s != %s" %
1337 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001338 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001339 self.assertEqual(expected_path, result,
1340 "url2pathame() failed; %s != %s" %
1341 (result, expected_path))
1342
1343 def test_quoting(self):
1344 # Test automatic quoting and unquoting works for pathnam2url() and
1345 # url2pathname() respectively
1346 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001347 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1348 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001349 self.assertEqual(expect, result,
1350 "pathname2url() failed; %s != %s" %
1351 (expect, result))
1352 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001353 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001354 self.assertEqual(expect, result,
1355 "url2pathname() failed; %s != %s" %
1356 (expect, result))
1357 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001358 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1359 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001360 self.assertEqual(expect, result,
1361 "pathname2url() failed; %s != %s" %
1362 (expect, result))
1363 given = "make+sure/using_unquote"
1364 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001365 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001366 self.assertEqual(expect, result,
1367 "url2pathname() failed; %s != %s" %
1368 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001369
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001370 @unittest.skipUnless(sys.platform == 'win32',
1371 'test specific to the urllib.url2path function.')
1372 def test_ntpath(self):
1373 given = ('/C:/', '///C:/', '/C|//')
1374 expect = 'C:\\'
1375 for url in given:
1376 result = urllib.request.url2pathname(url)
1377 self.assertEqual(expect, result,
1378 'urllib.request..url2pathname() failed; %s != %s' %
1379 (expect, result))
1380 given = '///C|/path'
1381 expect = 'C:\\path'
1382 result = urllib.request.url2pathname(given)
1383 self.assertEqual(expect, result,
1384 'urllib.request.url2pathname() failed; %s != %s' %
1385 (expect, result))
1386
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001387class Utility_Tests(unittest.TestCase):
1388 """Testcase to test the various utility functions in the urllib."""
1389
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001390 def test_thishost(self):
1391 """Test the urllib.request.thishost utility function returns a tuple"""
1392 self.assertIsInstance(urllib.request.thishost(), tuple)
1393
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001394
1395class URLopener_Tests(unittest.TestCase):
1396 """Testcase to test the open method of URLopener class."""
1397
1398 def test_quoted_open(self):
1399 class DummyURLopener(urllib.request.URLopener):
1400 def open_spam(self, url):
1401 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001402 with support.check_warnings(
1403 ('DummyURLopener style of invoking requests is deprecated.',
1404 DeprecationWarning)):
1405 self.assertEqual(DummyURLopener().open(
1406 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001407
Ezio Melotti79b99db2013-02-21 02:41:42 +02001408 # test the safe characters are not quoted by urlopen
1409 self.assertEqual(DummyURLopener().open(
1410 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1411 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001412
Guido van Rossume7ba4952007-06-06 23:52:48 +00001413# Just commented them out.
1414# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001415# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001416# fail in one of the tests, sometimes in other. I have a linux, and
1417# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001418# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001419# . Facundo
1420#
1421# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001422# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001423# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1424# serv.settimeout(3)
1425# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1426# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001427# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001428# try:
1429# conn, addr = serv.accept()
1430# conn.send("1 Hola mundo\n")
1431# cantdata = 0
1432# while cantdata < 13:
1433# data = conn.recv(13-cantdata)
1434# cantdata += len(data)
1435# time.sleep(.3)
1436# conn.send("2 No more lines\n")
1437# conn.close()
1438# except socket.timeout:
1439# pass
1440# finally:
1441# serv.close()
1442# evt.set()
1443#
1444# class FTPWrapperTests(unittest.TestCase):
1445#
1446# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001447# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001448# ftplib.FTP.port = 9093
1449# self.evt = threading.Event()
1450# threading.Thread(target=server, args=(self.evt,)).start()
1451# time.sleep(.1)
1452#
1453# def tearDown(self):
1454# self.evt.wait()
1455#
1456# def testBasic(self):
1457# # connects
1458# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001459# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001460#
1461# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001462# # global default timeout is ignored
1463# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001464# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001465# socket.setdefaulttimeout(30)
1466# try:
1467# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1468# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001469# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001470# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001471# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001472#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001473# def testTimeoutDefault(self):
1474# # global default timeout is used
1475# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001476# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001477# socket.setdefaulttimeout(30)
1478# try:
1479# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1480# finally:
1481# socket.setdefaulttimeout(None)
1482# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1483# ftp.close()
1484#
1485# def testTimeoutValue(self):
1486# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1487# timeout=30)
1488# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1489# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001490
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001491
Senthil Kumarande49d642011-10-16 23:54:44 +08001492class RequestTests(unittest.TestCase):
1493 """Unit tests for urllib.request.Request."""
1494
1495 def test_default_values(self):
1496 Request = urllib.request.Request
1497 request = Request("http://www.python.org")
1498 self.assertEqual(request.get_method(), 'GET')
1499 request = Request("http://www.python.org", {})
1500 self.assertEqual(request.get_method(), 'POST')
1501
1502 def test_with_method_arg(self):
1503 Request = urllib.request.Request
1504 request = Request("http://www.python.org", method='HEAD')
1505 self.assertEqual(request.method, 'HEAD')
1506 self.assertEqual(request.get_method(), 'HEAD')
1507 request = Request("http://www.python.org", {}, method='HEAD')
1508 self.assertEqual(request.method, 'HEAD')
1509 self.assertEqual(request.get_method(), 'HEAD')
1510 request = Request("http://www.python.org", method='GET')
1511 self.assertEqual(request.get_method(), 'GET')
1512 request.method = 'HEAD'
1513 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001514
1515
Senthil Kumaran277e9092013-04-10 20:51:19 -07001516class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001517
Senthil Kumaran277e9092013-04-10 20:51:19 -07001518 def test_converting_drive_letter(self):
1519 self.assertEqual(url2pathname("///C|"), 'C:')
1520 self.assertEqual(url2pathname("///C:"), 'C:')
1521 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001522
Senthil Kumaran277e9092013-04-10 20:51:19 -07001523 def test_converting_when_no_drive_letter(self):
1524 # cannot end a raw string in \
1525 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1526 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1527
1528 def test_simple_compare(self):
1529 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1530 r'C:\foo\bar\spam.foo')
1531
1532 def test_non_ascii_drive_letter(self):
1533 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1534
1535 def test_roundtrip_url2pathname(self):
1536 list_of_paths = ['C:',
1537 r'\\\C\test\\',
1538 r'C:\foo\bar\spam.foo'
1539 ]
1540 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001541 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001542
1543class PathName2URLTests(unittest.TestCase):
1544
1545 def test_converting_drive_letter(self):
1546 self.assertEqual(pathname2url("C:"), '///C:')
1547 self.assertEqual(pathname2url("C:\\"), '///C:')
1548
1549 def test_converting_when_no_drive_letter(self):
1550 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1551 '/////folder/test/')
1552 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1553 '////folder/test/')
1554 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1555 '/folder/test/')
1556
1557 def test_simple_compare(self):
1558 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1559 "///C:/foo/bar/spam.foo" )
1560
1561 def test_long_drive_letter(self):
1562 self.assertRaises(IOError, pathname2url, "XX:\\")
1563
1564 def test_roundtrip_pathname2url(self):
1565 list_of_paths = ['///C:',
1566 '/////folder/test/',
1567 '///C:/foo/bar/spam.foo']
1568 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001569 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001570
1571if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001572 unittest.main()