blob: 5084486e5ab4798f9f640f7c9102fbbfa7fc7d9e [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030059def fakehttp(fakedata):
60 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
93 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030094
95 return FakeHTTPConnection
96
97
Senthil Kumarance260142011-11-01 01:35:17 +080098class FakeHTTPMixin(object):
99 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +0800100 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300101 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +0800102
103 def unfakehttp(self):
104 http.client.HTTPConnection = self._connection_class
105
106
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700107class FakeFTPMixin(object):
108 def fakeftp(self):
109 class FakeFtpWrapper(object):
110 def __init__(self, user, passwd, host, port, dirs, timeout=None,
111 persistent=True):
112 pass
113
114 def retrfile(self, file, type):
115 return io.BytesIO(), 0
116
117 def close(self):
118 pass
119
120 self._ftpwrapper_class = urllib.request.ftpwrapper
121 urllib.request.ftpwrapper = FakeFtpWrapper
122
123 def unfakeftp(self):
124 urllib.request.ftpwrapper = self._ftpwrapper_class
125
126
Brett Cannon74bfd702003-04-25 09:39:47 +0000127class urlopen_FileTests(unittest.TestCase):
128 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000129
Brett Cannon74bfd702003-04-25 09:39:47 +0000130 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000131 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000132
Brett Cannon74bfd702003-04-25 09:39:47 +0000133 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000134
Brett Cannon74bfd702003-04-25 09:39:47 +0000135 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 # Create a temp file to use for testing
137 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
138 "ascii")
139 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000140 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000143 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000144 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000146
Brett Cannon74bfd702003-04-25 09:39:47 +0000147 def tearDown(self):
148 """Shut down the open object"""
149 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000150 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000151
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 def test_interface(self):
153 # Make sure object returned by urlopen() has the specified methods
154 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000155 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000156 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000157 "object returned by urlopen() lacks %s attribute" %
158 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000159
Brett Cannon74bfd702003-04-25 09:39:47 +0000160 def test_read(self):
161 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000162
Brett Cannon74bfd702003-04-25 09:39:47 +0000163 def test_readline(self):
164 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000165 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "calling readline() after exhausting the file did not"
167 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_readlines(self):
170 lines_list = self.returned_obj.readlines()
171 self.assertEqual(len(lines_list), 1,
172 "readlines() returned the wrong number of lines")
173 self.assertEqual(lines_list[0], self.text,
174 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000175
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 def test_fileno(self):
177 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000178 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000179 self.assertEqual(os.read(file_num, len(self.text)), self.text,
180 "Reading on the file descriptor returned by fileno() "
181 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800184 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 # by the tearDown() method for the test
186 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000187
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000189 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000190
Brett Cannon74bfd702003-04-25 09:39:47 +0000191 def test_geturl(self):
192 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000193
Christian Heimes9bd667a2008-01-20 15:14:11 +0000194 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000195 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_iter(self):
198 # Test iterator
199 # Don't need to count number of iterations since test would fail the
200 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200201 # comparison.
202 # Use the iterator in the usual implicit way to test for ticket #4608.
203 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000204 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000205
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800206 def test_relativelocalfile(self):
207 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
208
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000209class ProxyTests(unittest.TestCase):
210
211 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000212 # Records changes to env vars
213 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000214 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000215 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000216 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000217 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000218
219 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000220 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000221 self.env.__exit__()
222 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000223
224 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000225 self.env.set('NO_PROXY', 'localhost')
226 proxies = urllib.request.getproxies_environment()
227 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000228 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800229 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700230 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800231 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700232 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
233 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
234
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700235 def test_proxy_cgi_ignore(self):
236 try:
237 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
238 proxies = urllib.request.getproxies_environment()
239 self.assertEqual('http://somewhere:3128', proxies['http'])
240 self.env.set('REQUEST_METHOD', 'GET')
241 proxies = urllib.request.getproxies_environment()
242 self.assertNotIn('http', proxies)
243 finally:
244 self.env.unset('REQUEST_METHOD')
245 self.env.unset('HTTP_PROXY')
246
Martin Panteraa279822016-04-30 01:03:40 +0000247 def test_proxy_bypass_environment_host_match(self):
248 bypass = urllib.request.proxy_bypass_environment
249 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800250 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000251 self.assertTrue(bypass('localhost'))
252 self.assertTrue(bypass('LocalHost')) # MixedCase
253 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
254 self.assertTrue(bypass('newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800255 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Martin Panteraa279822016-04-30 01:03:40 +0000256 self.assertTrue(bypass('anotherdomain.com:8888'))
257 self.assertTrue(bypass('www.newdomain.com:1234'))
258 self.assertFalse(bypass('prelocalhost'))
259 self.assertFalse(bypass('newdomain.com')) # no port
260 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700261
262class ProxyTests_withOrderedEnv(unittest.TestCase):
263
264 def setUp(self):
265 # We need to test conditions, where variable order _is_ significant
266 self._saved_env = os.environ
267 # Monkey patch os.environ, start with empty fake environment
268 os.environ = collections.OrderedDict()
269
270 def tearDown(self):
271 os.environ = self._saved_env
272
273 def test_getproxies_environment_prefer_lowercase(self):
274 # Test lowercase preference with removal
275 os.environ['no_proxy'] = ''
276 os.environ['No_Proxy'] = 'localhost'
277 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
278 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
279 os.environ['http_proxy'] = ''
280 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
281 proxies = urllib.request.getproxies_environment()
282 self.assertEqual({}, proxies)
283 # Test lowercase preference of proxy bypass and correct matching including ports
284 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
285 os.environ['No_Proxy'] = 'xyz.com'
286 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
287 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
288 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
289 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
290 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
291 # Test lowercase preference with replacement
292 os.environ['http_proxy'] = 'http://somewhere:3128'
293 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
294 proxies = urllib.request.getproxies_environment()
295 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000296
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700297class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000298 """Test urlopen() opening a fake http connection."""
299
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000300 def check_read(self, ver):
301 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000302 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000303 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000304 self.assertEqual(fp.readline(), b"Hello!")
305 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000306 self.assertEqual(fp.geturl(), 'http://python.org/')
307 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000308 finally:
309 self.unfakehttp()
310
Senthil Kumaran26430412011-04-13 07:01:19 +0800311 def test_url_fragment(self):
312 # Issue #11703: geturl() omits fragments in the original URL.
313 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800314 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800315 try:
316 fp = urllib.request.urlopen(url)
317 self.assertEqual(fp.geturl(), url)
318 finally:
319 self.unfakehttp()
320
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800321 def test_willclose(self):
322 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800323 try:
324 resp = urlopen("http://www.python.org")
325 self.assertTrue(resp.fp.will_close)
326 finally:
327 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800328
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000329 def test_read_0_9(self):
330 # "0.9" response accepted (but not "simple responses" without
331 # a status line)
332 self.check_read(b"0.9")
333
334 def test_read_1_0(self):
335 self.check_read(b"1.0")
336
337 def test_read_1_1(self):
338 self.check_read(b"1.1")
339
Christian Heimes57dddfb2008-01-02 18:30:52 +0000340 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200341 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000342 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
343Date: Wed, 02 Jan 2008 03:03:54 GMT
344Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
345Connection: close
346Content-Type: text/html; charset=iso-8859-1
347''')
348 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200349 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000350 finally:
351 self.unfakehttp()
352
guido@google.coma119df92011-03-29 11:41:02 -0700353 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200354 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700355 self.fakehttp(b'''HTTP/1.1 302 Found
356Date: Wed, 02 Jan 2008 03:03:54 GMT
357Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
358Location: file://guidocomputer.athome.com:/python/license
359Connection: close
360Content-Type: text/html; charset=iso-8859-1
361''')
362 try:
Martin Pantera0370222016-02-04 06:01:35 +0000363 msg = "Redirection to url 'file:"
364 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
365 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700366 finally:
367 self.unfakehttp()
368
Martin Pantera0370222016-02-04 06:01:35 +0000369 def test_redirect_limit_independent(self):
370 # Ticket #12923: make sure independent requests each use their
371 # own retry limit.
372 for i in range(FancyURLopener().maxtries):
373 self.fakehttp(b'''HTTP/1.1 302 Found
374Location: file://guidocomputer.athome.com:/python/license
375Connection: close
376''')
377 try:
378 self.assertRaises(urllib.error.HTTPError, urlopen,
379 "http://something")
380 finally:
381 self.unfakehttp()
382
Guido van Rossumd8faa362007-04-27 19:54:29 +0000383 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200384 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000385 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000386 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000387 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200388 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000389 finally:
390 self.unfakehttp()
391
Senthil Kumaranf5776862012-10-21 13:30:02 -0700392 def test_missing_localfile(self):
393 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700394 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700395 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700396 self.assertTrue(e.exception.filename)
397 self.assertTrue(e.exception.reason)
398
399 def test_file_notexists(self):
400 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700401 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700402 try:
403 self.assertTrue(os.path.exists(tmp_file))
404 with urlopen(tmp_fileurl) as fobj:
405 self.assertTrue(fobj)
406 finally:
407 os.close(fd)
408 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700409 self.assertFalse(os.path.exists(tmp_file))
410 with self.assertRaises(urllib.error.URLError):
411 urlopen(tmp_fileurl)
412
413 def test_ftp_nohost(self):
414 test_ftp_url = 'ftp:///path'
415 with self.assertRaises(urllib.error.URLError) as e:
416 urlopen(test_ftp_url)
417 self.assertFalse(e.exception.filename)
418 self.assertTrue(e.exception.reason)
419
420 def test_ftp_nonexisting(self):
421 with self.assertRaises(urllib.error.URLError) as e:
422 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
423 self.assertFalse(e.exception.filename)
424 self.assertTrue(e.exception.reason)
425
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700426 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
427 def test_ftp_cache_pruning(self):
428 self.fakeftp()
429 try:
430 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
431 urlopen('ftp://localhost')
432 finally:
433 self.unfakeftp()
434
Senthil Kumaranf5776862012-10-21 13:30:02 -0700435
Senthil Kumarande0eb242010-08-01 17:53:37 +0000436 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000437 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000438 try:
439 fp = urlopen("http://user:pass@python.org/")
440 self.assertEqual(fp.readline(), b"Hello!")
441 self.assertEqual(fp.readline(), b"")
442 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
443 self.assertEqual(fp.getcode(), 200)
444 finally:
445 self.unfakehttp()
446
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800447 def test_userpass_inurl_w_spaces(self):
448 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
449 try:
450 userpass = "a b:c d"
451 url = "http://{}@python.org/".format(userpass)
452 fakehttp_wrapper = http.client.HTTPConnection
453 authorization = ("Authorization: Basic %s\r\n" %
454 b64encode(userpass.encode("ASCII")).decode("ASCII"))
455 fp = urlopen(url)
456 # The authorization header must be in place
457 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
458 self.assertEqual(fp.readline(), b"Hello!")
459 self.assertEqual(fp.readline(), b"")
460 # the spaces are quoted in URL so no match
461 self.assertNotEqual(fp.geturl(), url)
462 self.assertEqual(fp.getcode(), 200)
463 finally:
464 self.unfakehttp()
465
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700466 def test_URLopener_deprecation(self):
467 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700468 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700469
Antoine Pitrou07df6552014-11-02 17:23:14 +0100470 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800471 def test_cafile_and_context(self):
472 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200473 with support.check_warnings(('', DeprecationWarning)):
474 with self.assertRaises(ValueError):
475 urllib.request.urlopen(
476 "https://localhost", cafile="/nonexistent/path", context=context
477 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800478
Antoine Pitroudf204be2012-11-24 17:59:08 +0100479class urlopen_DataTests(unittest.TestCase):
480 """Test urlopen() opening a data URL."""
481
482 def setUp(self):
483 # text containing URL special- and unicode-characters
484 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
485 # 2x1 pixel RGB PNG image with one black and one white pixel
486 self.image = (
487 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
488 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
489 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
490 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
491
492 self.text_url = (
493 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
494 "D%26%20%C3%B6%20%C3%84%20")
495 self.text_url_base64 = (
496 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
497 "sJT0mIPYgxCA%3D")
498 # base64 encoded data URL that contains ignorable spaces,
499 # such as "\n", " ", "%0A", and "%20".
500 self.image_url = (
501 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
502 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
503 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
504
505 self.text_url_resp = urllib.request.urlopen(self.text_url)
506 self.text_url_base64_resp = urllib.request.urlopen(
507 self.text_url_base64)
508 self.image_url_resp = urllib.request.urlopen(self.image_url)
509
510 def test_interface(self):
511 # Make sure object returned by urlopen() has the specified methods
512 for attr in ("read", "readline", "readlines",
513 "close", "info", "geturl", "getcode", "__iter__"):
514 self.assertTrue(hasattr(self.text_url_resp, attr),
515 "object returned by urlopen() lacks %s attribute" %
516 attr)
517
518 def test_info(self):
519 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
520 self.assertEqual(self.text_url_base64_resp.info().get_params(),
521 [('text/plain', ''), ('charset', 'ISO-8859-1')])
522 self.assertEqual(self.image_url_resp.info()['content-length'],
523 str(len(self.image)))
524 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
525 [('text/plain', ''), ('charset', 'US-ASCII')])
526
527 def test_geturl(self):
528 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
529 self.assertEqual(self.text_url_base64_resp.geturl(),
530 self.text_url_base64)
531 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
532
533 def test_read_text(self):
534 self.assertEqual(self.text_url_resp.read().decode(
535 dict(self.text_url_resp.info().get_params())['charset']), self.text)
536
537 def test_read_text_base64(self):
538 self.assertEqual(self.text_url_base64_resp.read().decode(
539 dict(self.text_url_base64_resp.info().get_params())['charset']),
540 self.text)
541
542 def test_read_image(self):
543 self.assertEqual(self.image_url_resp.read(), self.image)
544
545 def test_missing_comma(self):
546 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
547
548 def test_invalid_base64_data(self):
549 # missing padding character
550 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
551
Brett Cannon19691362003-04-29 05:08:06 +0000552class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000553 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000554
Brett Cannon19691362003-04-29 05:08:06 +0000555 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000556 # Create a list of temporary files. Each item in the list is a file
557 # name (absolute path or relative to the current working directory).
558 # All files in this list will be deleted in the tearDown method. Note,
559 # this only helps to makes sure temporary files get deleted, but it
560 # does nothing about trying to close files that may still be open. It
561 # is the responsibility of the developer to properly close files even
562 # when exceptional conditions occur.
563 self.tempFiles = []
564
Brett Cannon19691362003-04-29 05:08:06 +0000565 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000566 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000567 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000568 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000569 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000570 FILE.write(self.text)
571 FILE.close()
572 finally:
573 try: FILE.close()
574 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000575
576 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000577 # Delete the temporary files.
578 for each in self.tempFiles:
579 try: os.remove(each)
580 except: pass
581
582 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000583 filePath = os.path.abspath(filePath)
584 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000585 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000586 except UnicodeEncodeError:
587 raise unittest.SkipTest("filePath is not encodable to utf8")
588 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000589
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000590 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000591 """Creates a new temporary file containing the specified data,
592 registers the file for deletion during the test fixture tear down, and
593 returns the absolute path of the file."""
594
595 newFd, newFilePath = tempfile.mkstemp()
596 try:
597 self.registerFileForCleanUp(newFilePath)
598 newFile = os.fdopen(newFd, "wb")
599 newFile.write(data)
600 newFile.close()
601 finally:
602 try: newFile.close()
603 except: pass
604 return newFilePath
605
606 def registerFileForCleanUp(self, fileName):
607 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000608
609 def test_basic(self):
610 # Make sure that a local file just gets its own location returned and
611 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000612 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000613 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000614 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000615 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000616 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000617
618 def test_copy(self):
619 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000620 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000621 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000622 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000623 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000624 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000625 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000626 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000627 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000628 try:
629 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000630 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000631 finally:
632 try: FILE.close()
633 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000634 self.assertEqual(self.text, text)
635
636 def test_reporthook(self):
637 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700638 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
639 self.assertIsInstance(block_count, int)
640 self.assertIsInstance(block_read_size, int)
641 self.assertIsInstance(file_size, int)
642 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000643 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000644 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000645 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000646 urllib.request.urlretrieve(
647 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000648 second_temp, hooktester)
649
650 def test_reporthook_0_bytes(self):
651 # Test on zero length file. Should call reporthook only 1 time.
652 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700653 def hooktester(block_count, block_read_size, file_size, _report=report):
654 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000655 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000656 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000657 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000658 self.assertEqual(len(report), 1)
659 self.assertEqual(report[0][2], 0)
660
661 def test_reporthook_5_bytes(self):
662 # Test on 5 byte file. Should call reporthook only 2 times (once when
663 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700664 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000665 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700666 def hooktester(block_count, block_read_size, file_size, _report=report):
667 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000668 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000669 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000670 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000671 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800672 self.assertEqual(report[0][2], 5)
673 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000674
675 def test_reporthook_8193_bytes(self):
676 # Test on 8193 byte file. Should call reporthook only 3 times (once
677 # when the "network connection" is established, once for the next 8192
678 # bytes, and once for the last byte).
679 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700680 def hooktester(block_count, block_read_size, file_size, _report=report):
681 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000682 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000683 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000684 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000685 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800686 self.assertEqual(report[0][2], 8193)
687 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700688 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800689 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000690
Senthil Kumarance260142011-11-01 01:35:17 +0800691
692class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
693 """Test urllib.urlretrieve() using fake http connections"""
694
695 def test_short_content_raises_ContentTooShortError(self):
696 self.fakehttp(b'''HTTP/1.1 200 OK
697Date: Wed, 02 Jan 2008 03:03:54 GMT
698Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
699Connection: close
700Content-Length: 100
701Content-Type: text/html; charset=iso-8859-1
702
703FF
704''')
705
706 def _reporthook(par1, par2, par3):
707 pass
708
709 with self.assertRaises(urllib.error.ContentTooShortError):
710 try:
711 urllib.request.urlretrieve('http://example.com/',
712 reporthook=_reporthook)
713 finally:
714 self.unfakehttp()
715
716 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
717 self.fakehttp(b'''HTTP/1.1 200 OK
718Date: Wed, 02 Jan 2008 03:03:54 GMT
719Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
720Connection: close
721Content-Length: 100
722Content-Type: text/html; charset=iso-8859-1
723
724FF
725''')
726 with self.assertRaises(urllib.error.ContentTooShortError):
727 try:
728 urllib.request.urlretrieve('http://example.com/')
729 finally:
730 self.unfakehttp()
731
732
Brett Cannon74bfd702003-04-25 09:39:47 +0000733class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400734 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000735
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000736 According to RFC 2396 (Uniform Resource Identifiers), to escape a
737 character you write it as '%' + <2 character US-ASCII hex value>.
738 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
739 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000740
741 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000742
Brett Cannon74bfd702003-04-25 09:39:47 +0000743 Reserved characters : ";/?:@&=+$,"
744 Have special meaning in URIs and must be escaped if not being used for
745 their special meaning
746 Data characters : letters, digits, and "-_.!~*'()"
747 Unreserved and do not need to be escaped; can be, though, if desired
748 Control characters : 0x00 - 0x1F, 0x7F
749 Have no use in URIs so must be escaped
750 space : 0x20
751 Must be escaped
752 Delimiters : '<>#%"'
753 Must be escaped
754 Unwise : "{}|\^[]`"
755 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000756
Brett Cannon74bfd702003-04-25 09:39:47 +0000757 """
758
759 def test_never_quote(self):
760 # Make sure quote() does not quote letters, digits, and "_,.-"
761 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
762 "abcdefghijklmnopqrstuvwxyz",
763 "0123456789",
764 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000765 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000766 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000767 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000768 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000769 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000770 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000771
772 def test_default_safe(self):
773 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000774 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000775
776 def test_safe(self):
777 # Test setting 'safe' parameter does what it should do
778 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000779 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000780 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000781 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000782 result = urllib.parse.quote_plus(quote_by_default,
783 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000784 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000785 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000786 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000787 # Safe expressed as bytes rather than str
788 result = urllib.parse.quote(quote_by_default, safe=b"<>")
789 self.assertEqual(quote_by_default, result,
790 "using quote(): %r != %r" % (quote_by_default, result))
791 # "Safe" non-ASCII characters should have no effect
792 # (Since URIs are not allowed to have non-ASCII characters)
793 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
794 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
795 self.assertEqual(expect, result,
796 "using quote(): %r != %r" %
797 (expect, result))
798 # Same as above, but using a bytes rather than str
799 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
800 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
801 self.assertEqual(expect, result,
802 "using quote(): %r != %r" %
803 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000804
805 def test_default_quoting(self):
806 # Make sure all characters that should be quoted are by default sans
807 # space (separate test for that).
808 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400809 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000810 should_quote.append(chr(127)) # For 0x7F
811 should_quote = ''.join(should_quote)
812 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000813 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000814 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000815 "using quote(): "
816 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000817 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000819 self.assertEqual(hexescape(char), result,
820 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000821 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000822 (char, hexescape(char), result))
823 del should_quote
824 partial_quote = "ab[]cd"
825 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000826 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000827 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000828 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800829 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000830 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000831 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000832
833 def test_quoting_space(self):
834 # Make sure quote() and quote_plus() handle spaces as specified in
835 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000836 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000837 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000838 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000839 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000840 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000841 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000842 given = "a b cd e f"
843 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000844 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000845 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000846 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000847 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000848 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000849 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000850 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000851
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000852 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000853 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000854 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000855 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000856 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000857 # Test with bytes
858 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
859 'alpha%2Bbeta+gamma')
860 # Test with safe bytes
861 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
862 'alpha+beta+gamma')
863
864 def test_quote_bytes(self):
865 # Bytes should quote directly to percent-encoded values
866 given = b"\xa2\xd8ab\xff"
867 expect = "%A2%D8ab%FF"
868 result = urllib.parse.quote(given)
869 self.assertEqual(expect, result,
870 "using quote(): %r != %r" % (expect, result))
871 # Encoding argument should raise type error on bytes input
872 self.assertRaises(TypeError, urllib.parse.quote, given,
873 encoding="latin-1")
874 # quote_from_bytes should work the same
875 result = urllib.parse.quote_from_bytes(given)
876 self.assertEqual(expect, result,
877 "using quote_from_bytes(): %r != %r"
878 % (expect, result))
879
880 def test_quote_with_unicode(self):
881 # Characters in Latin-1 range, encoded by default in UTF-8
882 given = "\xa2\xd8ab\xff"
883 expect = "%C2%A2%C3%98ab%C3%BF"
884 result = urllib.parse.quote(given)
885 self.assertEqual(expect, result,
886 "using quote(): %r != %r" % (expect, result))
887 # Characters in Latin-1 range, encoded by with None (default)
888 result = urllib.parse.quote(given, encoding=None, errors=None)
889 self.assertEqual(expect, result,
890 "using quote(): %r != %r" % (expect, result))
891 # Characters in Latin-1 range, encoded with Latin-1
892 given = "\xa2\xd8ab\xff"
893 expect = "%A2%D8ab%FF"
894 result = urllib.parse.quote(given, encoding="latin-1")
895 self.assertEqual(expect, result,
896 "using quote(): %r != %r" % (expect, result))
897 # Characters in BMP, encoded by default in UTF-8
898 given = "\u6f22\u5b57" # "Kanji"
899 expect = "%E6%BC%A2%E5%AD%97"
900 result = urllib.parse.quote(given)
901 self.assertEqual(expect, result,
902 "using quote(): %r != %r" % (expect, result))
903 # Characters in BMP, encoded with Latin-1
904 given = "\u6f22\u5b57"
905 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
906 encoding="latin-1")
907 # Characters in BMP, encoded with Latin-1, with replace error handling
908 given = "\u6f22\u5b57"
909 expect = "%3F%3F" # "??"
910 result = urllib.parse.quote(given, encoding="latin-1",
911 errors="replace")
912 self.assertEqual(expect, result,
913 "using quote(): %r != %r" % (expect, result))
914 # Characters in BMP, Latin-1, with xmlcharref error handling
915 given = "\u6f22\u5b57"
916 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
917 result = urllib.parse.quote(given, encoding="latin-1",
918 errors="xmlcharrefreplace")
919 self.assertEqual(expect, result,
920 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000921
Georg Brandlfaf41492009-05-26 18:31:11 +0000922 def test_quote_plus_with_unicode(self):
923 # Encoding (latin-1) test for quote_plus
924 given = "\xa2\xd8 \xff"
925 expect = "%A2%D8+%FF"
926 result = urllib.parse.quote_plus(given, encoding="latin-1")
927 self.assertEqual(expect, result,
928 "using quote_plus(): %r != %r" % (expect, result))
929 # Errors test for quote_plus
930 given = "ab\u6f22\u5b57 cd"
931 expect = "ab%3F%3F+cd"
932 result = urllib.parse.quote_plus(given, encoding="latin-1",
933 errors="replace")
934 self.assertEqual(expect, result,
935 "using quote_plus(): %r != %r" % (expect, result))
936
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000937
Brett Cannon74bfd702003-04-25 09:39:47 +0000938class UnquotingTests(unittest.TestCase):
939 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000940
Brett Cannon74bfd702003-04-25 09:39:47 +0000941 See the doc string for quoting_Tests for details on quoting and such.
942
943 """
944
945 def test_unquoting(self):
946 # Make sure unquoting of all ASCII values works
947 escape_list = []
948 for num in range(128):
949 given = hexescape(chr(num))
950 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000951 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000952 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000953 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000954 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000955 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000956 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000957 (expect, result))
958 escape_list.append(given)
959 escape_string = ''.join(escape_list)
960 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000961 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000962 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000963 "using unquote(): not all characters escaped: "
964 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000965 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
966 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000967 with support.check_warnings(('', BytesWarning), quiet=True):
968 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000969
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000970 def test_unquoting_badpercent(self):
971 # Test unquoting on bad percent-escapes
972 given = '%xab'
973 expect = given
974 result = urllib.parse.unquote(given)
975 self.assertEqual(expect, result, "using unquote(): %r != %r"
976 % (expect, result))
977 given = '%x'
978 expect = given
979 result = urllib.parse.unquote(given)
980 self.assertEqual(expect, result, "using unquote(): %r != %r"
981 % (expect, result))
982 given = '%'
983 expect = given
984 result = urllib.parse.unquote(given)
985 self.assertEqual(expect, result, "using unquote(): %r != %r"
986 % (expect, result))
987 # unquote_to_bytes
988 given = '%xab'
989 expect = bytes(given, 'ascii')
990 result = urllib.parse.unquote_to_bytes(given)
991 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
992 % (expect, result))
993 given = '%x'
994 expect = bytes(given, 'ascii')
995 result = urllib.parse.unquote_to_bytes(given)
996 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
997 % (expect, result))
998 given = '%'
999 expect = bytes(given, 'ascii')
1000 result = urllib.parse.unquote_to_bytes(given)
1001 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1002 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001003 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1004 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001005
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001006 def test_unquoting_mixed_case(self):
1007 # Test unquoting on mixed-case hex digits in the percent-escapes
1008 given = '%Ab%eA'
1009 expect = b'\xab\xea'
1010 result = urllib.parse.unquote_to_bytes(given)
1011 self.assertEqual(expect, result,
1012 "using unquote_to_bytes(): %r != %r"
1013 % (expect, result))
1014
Brett Cannon74bfd702003-04-25 09:39:47 +00001015 def test_unquoting_parts(self):
1016 # Make sure unquoting works when have non-quoted characters
1017 # interspersed
1018 given = 'ab%sd' % hexescape('c')
1019 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001020 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001021 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001022 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001023 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001024 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001025 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001026
Brett Cannon74bfd702003-04-25 09:39:47 +00001027 def test_unquoting_plus(self):
1028 # Test difference between unquote() and unquote_plus()
1029 given = "are+there+spaces..."
1030 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001031 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001032 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001033 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001034 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001035 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001036 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001037 "using unquote_plus(): %r != %r" % (expect, result))
1038
1039 def test_unquote_to_bytes(self):
1040 given = 'br%C3%BCckner_sapporo_20050930.doc'
1041 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1042 result = urllib.parse.unquote_to_bytes(given)
1043 self.assertEqual(expect, result,
1044 "using unquote_to_bytes(): %r != %r"
1045 % (expect, result))
1046 # Test on a string with unescaped non-ASCII characters
1047 # (Technically an invalid URI; expect those characters to be UTF-8
1048 # encoded).
1049 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1050 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1051 self.assertEqual(expect, result,
1052 "using unquote_to_bytes(): %r != %r"
1053 % (expect, result))
1054 # Test with a bytes as input
1055 given = b'%A2%D8ab%FF'
1056 expect = b'\xa2\xd8ab\xff'
1057 result = urllib.parse.unquote_to_bytes(given)
1058 self.assertEqual(expect, result,
1059 "using unquote_to_bytes(): %r != %r"
1060 % (expect, result))
1061 # Test with a bytes as input, with unescaped non-ASCII bytes
1062 # (Technically an invalid URI; expect those bytes to be preserved)
1063 given = b'%A2\xd8ab%FF'
1064 expect = b'\xa2\xd8ab\xff'
1065 result = urllib.parse.unquote_to_bytes(given)
1066 self.assertEqual(expect, result,
1067 "using unquote_to_bytes(): %r != %r"
1068 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001069
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001070 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001071 # Characters in the Latin-1 range, encoded with UTF-8
1072 given = 'br%C3%BCckner_sapporo_20050930.doc'
1073 expect = 'br\u00fcckner_sapporo_20050930.doc'
1074 result = urllib.parse.unquote(given)
1075 self.assertEqual(expect, result,
1076 "using unquote(): %r != %r" % (expect, result))
1077 # Characters in the Latin-1 range, encoded with None (default)
1078 result = urllib.parse.unquote(given, encoding=None, errors=None)
1079 self.assertEqual(expect, result,
1080 "using unquote(): %r != %r" % (expect, result))
1081
1082 # Characters in the Latin-1 range, encoded with Latin-1
1083 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1084 encoding="latin-1")
1085 expect = 'br\u00fcckner_sapporo_20050930.doc'
1086 self.assertEqual(expect, result,
1087 "using unquote(): %r != %r" % (expect, result))
1088
1089 # Characters in BMP, encoded with UTF-8
1090 given = "%E6%BC%A2%E5%AD%97"
1091 expect = "\u6f22\u5b57" # "Kanji"
1092 result = urllib.parse.unquote(given)
1093 self.assertEqual(expect, result,
1094 "using unquote(): %r != %r" % (expect, result))
1095
1096 # Decode with UTF-8, invalid sequence
1097 given = "%F3%B1"
1098 expect = "\ufffd" # Replacement character
1099 result = urllib.parse.unquote(given)
1100 self.assertEqual(expect, result,
1101 "using unquote(): %r != %r" % (expect, result))
1102
1103 # Decode with UTF-8, invalid sequence, replace errors
1104 result = urllib.parse.unquote(given, errors="replace")
1105 self.assertEqual(expect, result,
1106 "using unquote(): %r != %r" % (expect, result))
1107
1108 # Decode with UTF-8, invalid sequence, ignoring errors
1109 given = "%F3%B1"
1110 expect = ""
1111 result = urllib.parse.unquote(given, errors="ignore")
1112 self.assertEqual(expect, result,
1113 "using unquote(): %r != %r" % (expect, result))
1114
1115 # A mix of non-ASCII and percent-encoded characters, UTF-8
1116 result = urllib.parse.unquote("\u6f22%C3%BC")
1117 expect = '\u6f22\u00fc'
1118 self.assertEqual(expect, result,
1119 "using unquote(): %r != %r" % (expect, result))
1120
1121 # A mix of non-ASCII and percent-encoded characters, Latin-1
1122 # (Note, the string contains non-Latin-1-representable characters)
1123 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1124 expect = '\u6f22\u00fc'
1125 self.assertEqual(expect, result,
1126 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001127
Brett Cannon74bfd702003-04-25 09:39:47 +00001128class urlencode_Tests(unittest.TestCase):
1129 """Tests for urlencode()"""
1130
1131 def help_inputtype(self, given, test_type):
1132 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001133
Brett Cannon74bfd702003-04-25 09:39:47 +00001134 'given' must lead to only the pairs:
1135 * 1st, 1
1136 * 2nd, 2
1137 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001138
Brett Cannon74bfd702003-04-25 09:39:47 +00001139 Test cannot assume anything about order. Docs make no guarantee and
1140 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001141
Brett Cannon74bfd702003-04-25 09:39:47 +00001142 """
1143 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001144 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001145 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001146 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001147 "testing %s: %s not found in %s" %
1148 (test_type, expected, result))
1149 self.assertEqual(result.count('&'), 2,
1150 "testing %s: expected 2 '&'s; got %s" %
1151 (test_type, result.count('&')))
1152 amp_location = result.index('&')
1153 on_amp_left = result[amp_location - 1]
1154 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001155 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001156 "testing %s: '&' not located in proper place in %s" %
1157 (test_type, result))
1158 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1159 "testing %s: "
1160 "unexpected number of characters: %s != %s" %
1161 (test_type, len(result), (5 * 3) + 2))
1162
1163 def test_using_mapping(self):
1164 # Test passing in a mapping object as an argument.
1165 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1166 "using dict as input type")
1167
1168 def test_using_sequence(self):
1169 # Test passing in a sequence of two-item sequences as an argument.
1170 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1171 "using sequence of two-item tuples as input")
1172
1173 def test_quoting(self):
1174 # Make sure keys and values are quoted using quote_plus()
1175 given = {"&":"="}
1176 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001177 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001178 self.assertEqual(expect, result)
1179 given = {"key name":"A bunch of pluses"}
1180 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001181 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001182 self.assertEqual(expect, result)
1183
1184 def test_doseq(self):
1185 # Test that passing True for 'doseq' parameter works correctly
1186 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001187 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1188 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001189 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001190 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001191 for value in given["sequence"]:
1192 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001193 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001194 self.assertEqual(result.count('&'), 2,
1195 "Expected 2 '&'s, got %s" % result.count('&'))
1196
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001197 def test_empty_sequence(self):
1198 self.assertEqual("", urllib.parse.urlencode({}))
1199 self.assertEqual("", urllib.parse.urlencode([]))
1200
1201 def test_nonstring_values(self):
1202 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1203 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1204
1205 def test_nonstring_seq_values(self):
1206 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1207 self.assertEqual("a=None&a=a",
1208 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001209 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001210 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001211 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001212
Senthil Kumarandf022da2010-07-03 17:48:22 +00001213 def test_urlencode_encoding(self):
1214 # ASCII encoding. Expect %3F with errors="replace'
1215 given = (('\u00a0', '\u00c1'),)
1216 expect = '%3F=%3F'
1217 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1218 self.assertEqual(expect, result)
1219
1220 # Default is UTF-8 encoding.
1221 given = (('\u00a0', '\u00c1'),)
1222 expect = '%C2%A0=%C3%81'
1223 result = urllib.parse.urlencode(given)
1224 self.assertEqual(expect, result)
1225
1226 # Latin-1 encoding.
1227 given = (('\u00a0', '\u00c1'),)
1228 expect = '%A0=%C1'
1229 result = urllib.parse.urlencode(given, encoding="latin-1")
1230 self.assertEqual(expect, result)
1231
1232 def test_urlencode_encoding_doseq(self):
1233 # ASCII Encoding. Expect %3F with errors="replace'
1234 given = (('\u00a0', '\u00c1'),)
1235 expect = '%3F=%3F'
1236 result = urllib.parse.urlencode(given, doseq=True,
1237 encoding="ASCII", errors="replace")
1238 self.assertEqual(expect, result)
1239
1240 # ASCII Encoding. On a sequence of values.
1241 given = (("\u00a0", (1, "\u00c1")),)
1242 expect = '%3F=1&%3F=%3F'
1243 result = urllib.parse.urlencode(given, True,
1244 encoding="ASCII", errors="replace")
1245 self.assertEqual(expect, result)
1246
1247 # Utf-8
1248 given = (("\u00a0", "\u00c1"),)
1249 expect = '%C2%A0=%C3%81'
1250 result = urllib.parse.urlencode(given, True)
1251 self.assertEqual(expect, result)
1252
1253 given = (("\u00a0", (42, "\u00c1")),)
1254 expect = '%C2%A0=42&%C2%A0=%C3%81'
1255 result = urllib.parse.urlencode(given, True)
1256 self.assertEqual(expect, result)
1257
1258 # latin-1
1259 given = (("\u00a0", "\u00c1"),)
1260 expect = '%A0=%C1'
1261 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1262 self.assertEqual(expect, result)
1263
1264 given = (("\u00a0", (42, "\u00c1")),)
1265 expect = '%A0=42&%A0=%C1'
1266 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1267 self.assertEqual(expect, result)
1268
1269 def test_urlencode_bytes(self):
1270 given = ((b'\xa0\x24', b'\xc1\x24'),)
1271 expect = '%A0%24=%C1%24'
1272 result = urllib.parse.urlencode(given)
1273 self.assertEqual(expect, result)
1274 result = urllib.parse.urlencode(given, True)
1275 self.assertEqual(expect, result)
1276
1277 # Sequence of values
1278 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1279 expect = '%A0%24=42&%A0%24=%C1%24'
1280 result = urllib.parse.urlencode(given, True)
1281 self.assertEqual(expect, result)
1282
1283 def test_urlencode_encoding_safe_parameter(self):
1284
1285 # Send '$' (\x24) as safe character
1286 # Default utf-8 encoding
1287
1288 given = ((b'\xa0\x24', b'\xc1\x24'),)
1289 result = urllib.parse.urlencode(given, safe=":$")
1290 expect = '%A0$=%C1$'
1291 self.assertEqual(expect, result)
1292
1293 given = ((b'\xa0\x24', b'\xc1\x24'),)
1294 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1295 expect = '%A0$=%C1$'
1296 self.assertEqual(expect, result)
1297
1298 # Safe parameter in sequence
1299 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1300 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1301 result = urllib.parse.urlencode(given, True, safe=":$")
1302 self.assertEqual(expect, result)
1303
1304 # Test all above in latin-1 encoding
1305
1306 given = ((b'\xa0\x24', b'\xc1\x24'),)
1307 result = urllib.parse.urlencode(given, safe=":$",
1308 encoding="latin-1")
1309 expect = '%A0$=%C1$'
1310 self.assertEqual(expect, result)
1311
1312 given = ((b'\xa0\x24', b'\xc1\x24'),)
1313 expect = '%A0$=%C1$'
1314 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1315 encoding="latin-1")
1316
1317 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1318 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1319 result = urllib.parse.urlencode(given, True, safe=":$",
1320 encoding="latin-1")
1321 self.assertEqual(expect, result)
1322
Brett Cannon74bfd702003-04-25 09:39:47 +00001323class Pathname_Tests(unittest.TestCase):
1324 """Test pathname2url() and url2pathname()"""
1325
1326 def test_basic(self):
1327 # Make sure simple tests pass
1328 expected_path = os.path.join("parts", "of", "a", "path")
1329 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001330 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001331 self.assertEqual(expected_url, result,
1332 "pathname2url() failed; %s != %s" %
1333 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001334 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001335 self.assertEqual(expected_path, result,
1336 "url2pathame() failed; %s != %s" %
1337 (result, expected_path))
1338
1339 def test_quoting(self):
1340 # Test automatic quoting and unquoting works for pathnam2url() and
1341 # url2pathname() respectively
1342 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001343 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1344 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001345 self.assertEqual(expect, result,
1346 "pathname2url() failed; %s != %s" %
1347 (expect, result))
1348 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001349 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001350 self.assertEqual(expect, result,
1351 "url2pathname() failed; %s != %s" %
1352 (expect, result))
1353 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001354 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1355 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001356 self.assertEqual(expect, result,
1357 "pathname2url() failed; %s != %s" %
1358 (expect, result))
1359 given = "make+sure/using_unquote"
1360 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001361 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001362 self.assertEqual(expect, result,
1363 "url2pathname() failed; %s != %s" %
1364 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001365
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001366 @unittest.skipUnless(sys.platform == 'win32',
1367 'test specific to the urllib.url2path function.')
1368 def test_ntpath(self):
1369 given = ('/C:/', '///C:/', '/C|//')
1370 expect = 'C:\\'
1371 for url in given:
1372 result = urllib.request.url2pathname(url)
1373 self.assertEqual(expect, result,
1374 'urllib.request..url2pathname() failed; %s != %s' %
1375 (expect, result))
1376 given = '///C|/path'
1377 expect = 'C:\\path'
1378 result = urllib.request.url2pathname(given)
1379 self.assertEqual(expect, result,
1380 'urllib.request.url2pathname() failed; %s != %s' %
1381 (expect, result))
1382
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001383class Utility_Tests(unittest.TestCase):
1384 """Testcase to test the various utility functions in the urllib."""
1385
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001386 def test_thishost(self):
1387 """Test the urllib.request.thishost utility function returns a tuple"""
1388 self.assertIsInstance(urllib.request.thishost(), tuple)
1389
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001390
1391class URLopener_Tests(unittest.TestCase):
1392 """Testcase to test the open method of URLopener class."""
1393
1394 def test_quoted_open(self):
1395 class DummyURLopener(urllib.request.URLopener):
1396 def open_spam(self, url):
1397 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001398 with support.check_warnings(
1399 ('DummyURLopener style of invoking requests is deprecated.',
1400 DeprecationWarning)):
1401 self.assertEqual(DummyURLopener().open(
1402 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001403
Ezio Melotti79b99db2013-02-21 02:41:42 +02001404 # test the safe characters are not quoted by urlopen
1405 self.assertEqual(DummyURLopener().open(
1406 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1407 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001408
Guido van Rossume7ba4952007-06-06 23:52:48 +00001409# Just commented them out.
1410# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001411# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001412# fail in one of the tests, sometimes in other. I have a linux, and
1413# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001414# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001415# . Facundo
1416#
1417# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001418# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001419# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1420# serv.settimeout(3)
1421# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1422# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001423# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001424# try:
1425# conn, addr = serv.accept()
1426# conn.send("1 Hola mundo\n")
1427# cantdata = 0
1428# while cantdata < 13:
1429# data = conn.recv(13-cantdata)
1430# cantdata += len(data)
1431# time.sleep(.3)
1432# conn.send("2 No more lines\n")
1433# conn.close()
1434# except socket.timeout:
1435# pass
1436# finally:
1437# serv.close()
1438# evt.set()
1439#
1440# class FTPWrapperTests(unittest.TestCase):
1441#
1442# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001443# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001444# ftplib.FTP.port = 9093
1445# self.evt = threading.Event()
1446# threading.Thread(target=server, args=(self.evt,)).start()
1447# time.sleep(.1)
1448#
1449# def tearDown(self):
1450# self.evt.wait()
1451#
1452# def testBasic(self):
1453# # connects
1454# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001455# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001456#
1457# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001458# # global default timeout is ignored
1459# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001460# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001461# socket.setdefaulttimeout(30)
1462# try:
1463# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1464# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001465# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001466# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001467# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001468#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001469# def testTimeoutDefault(self):
1470# # global default timeout is used
1471# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001472# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001473# socket.setdefaulttimeout(30)
1474# try:
1475# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1476# finally:
1477# socket.setdefaulttimeout(None)
1478# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1479# ftp.close()
1480#
1481# def testTimeoutValue(self):
1482# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1483# timeout=30)
1484# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1485# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001486
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001487
Senthil Kumarande49d642011-10-16 23:54:44 +08001488class RequestTests(unittest.TestCase):
1489 """Unit tests for urllib.request.Request."""
1490
1491 def test_default_values(self):
1492 Request = urllib.request.Request
1493 request = Request("http://www.python.org")
1494 self.assertEqual(request.get_method(), 'GET')
1495 request = Request("http://www.python.org", {})
1496 self.assertEqual(request.get_method(), 'POST')
1497
1498 def test_with_method_arg(self):
1499 Request = urllib.request.Request
1500 request = Request("http://www.python.org", method='HEAD')
1501 self.assertEqual(request.method, 'HEAD')
1502 self.assertEqual(request.get_method(), 'HEAD')
1503 request = Request("http://www.python.org", {}, method='HEAD')
1504 self.assertEqual(request.method, 'HEAD')
1505 self.assertEqual(request.get_method(), 'HEAD')
1506 request = Request("http://www.python.org", method='GET')
1507 self.assertEqual(request.get_method(), 'GET')
1508 request.method = 'HEAD'
1509 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001510
1511
Senthil Kumaran277e9092013-04-10 20:51:19 -07001512class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001513
Senthil Kumaran277e9092013-04-10 20:51:19 -07001514 def test_converting_drive_letter(self):
1515 self.assertEqual(url2pathname("///C|"), 'C:')
1516 self.assertEqual(url2pathname("///C:"), 'C:')
1517 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001518
Senthil Kumaran277e9092013-04-10 20:51:19 -07001519 def test_converting_when_no_drive_letter(self):
1520 # cannot end a raw string in \
1521 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1522 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1523
1524 def test_simple_compare(self):
1525 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1526 r'C:\foo\bar\spam.foo')
1527
1528 def test_non_ascii_drive_letter(self):
1529 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1530
1531 def test_roundtrip_url2pathname(self):
1532 list_of_paths = ['C:',
1533 r'\\\C\test\\',
1534 r'C:\foo\bar\spam.foo'
1535 ]
1536 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001537 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001538
1539class PathName2URLTests(unittest.TestCase):
1540
1541 def test_converting_drive_letter(self):
1542 self.assertEqual(pathname2url("C:"), '///C:')
1543 self.assertEqual(pathname2url("C:\\"), '///C:')
1544
1545 def test_converting_when_no_drive_letter(self):
1546 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1547 '/////folder/test/')
1548 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1549 '////folder/test/')
1550 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1551 '/folder/test/')
1552
1553 def test_simple_compare(self):
1554 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1555 "///C:/foo/bar/spam.foo" )
1556
1557 def test_long_drive_letter(self):
1558 self.assertRaises(IOError, pathname2url, "XX:\\")
1559
1560 def test_roundtrip_pathname2url(self):
1561 list_of_paths = ['///C:',
1562 '/////folder/test/',
1563 '///C:/foo/bar/spam.foo']
1564 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001565 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001566
1567if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001568 unittest.main()