blob: f41fa2a950686c00029cf951d95a9fe1c1ef039d [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Hai Shi3ddc6342020-06-30 21:46:06 +080012from test.support import os_helper
13from test.support import warnings_helper
Brett Cannon74bfd702003-04-25 09:39:47 +000014import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010015try:
16 import ssl
17except ImportError:
18 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080019import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000020import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070021from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000022
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010024import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080025
Senthil Kumaran8b081b72013-04-10 20:53:12 -070026
Brett Cannon74bfd702003-04-25 09:39:47 +000027def hexescape(char):
28 """Escape char as RFC 2396 specifies"""
29 hex_repr = hex(ord(char))[2:].upper()
30 if len(hex_repr) == 1:
31 hex_repr = "0%s" % hex_repr
32 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000033
Jeremy Hylton1afc1692008-06-18 20:49:58 +000034# Shortcut for testing FancyURLopener
35_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070036
37
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038def urlopen(url, data=None, proxies=None):
39 """urlopen(url [, data]) -> open file-like object"""
40 global _urlopener
41 if proxies is not None:
42 opener = urllib.request.FancyURLopener(proxies=proxies)
43 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000044 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000045 _urlopener = opener
46 else:
47 opener = _urlopener
48 if data is None:
49 return opener.open(url)
50 else:
51 return opener.open(url, data)
52
Senthil Kumarance260142011-11-01 01:35:17 +080053
Martin Pantera0370222016-02-04 06:01:35 +000054def FancyURLopener():
Hai Shi3ddc6342020-06-30 21:46:06 +080055 with warnings_helper.check_warnings(
Martin Pantera0370222016-02-04 06:01:35 +000056 ('FancyURLopener style of invoking requests is deprecated.',
57 DeprecationWarning)):
58 return urllib.request.FancyURLopener()
59
60
Victor Stinnereb976e42019-06-12 04:07:38 +020061def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030062 class FakeSocket(io.BytesIO):
63 io_refs = 1
64
65 def sendall(self, data):
66 FakeHTTPConnection.buf = data
67
68 def makefile(self, *args, **kwds):
69 self.io_refs += 1
70 return self
71
72 def read(self, amt=None):
73 if self.closed:
74 return b""
75 return io.BytesIO.read(self, amt)
76
77 def readline(self, length=None):
78 if self.closed:
79 return b""
80 return io.BytesIO.readline(self, length)
81
82 def close(self):
83 self.io_refs -= 1
84 if self.io_refs == 0:
85 io.BytesIO.close(self)
86
87 class FakeHTTPConnection(http.client.HTTPConnection):
88
89 # buffer to store data for verification in urlopen tests.
90 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030091
92 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000093 self.sock = FakeSocket(self.fakedata)
94 type(self).fakesock = self.sock
Victor Stinnereb976e42019-06-12 04:07:38 +020095
96 if mock_close:
97 # bpo-36918: HTTPConnection destructor calls close() which calls
98 # flush(). Problem: flush() calls self.fp.flush() which raises
99 # "ValueError: I/O operation on closed file" which is logged as an
100 # "Exception ignored in". Override close() to silence this error.
101 def close(self):
102 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000103 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300104
105 return FakeHTTPConnection
106
107
Senthil Kumarance260142011-11-01 01:35:17 +0800108class FakeHTTPMixin(object):
Victor Stinnereb976e42019-06-12 04:07:38 +0200109 def fakehttp(self, fakedata, mock_close=False):
110 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800111 self._connection_class = http.client.HTTPConnection
Victor Stinnereb976e42019-06-12 04:07:38 +0200112 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800113
114 def unfakehttp(self):
115 http.client.HTTPConnection = self._connection_class
116
117
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700118class FakeFTPMixin(object):
119 def fakeftp(self):
120 class FakeFtpWrapper(object):
121 def __init__(self, user, passwd, host, port, dirs, timeout=None,
122 persistent=True):
123 pass
124
125 def retrfile(self, file, type):
126 return io.BytesIO(), 0
127
128 def close(self):
129 pass
130
131 self._ftpwrapper_class = urllib.request.ftpwrapper
132 urllib.request.ftpwrapper = FakeFtpWrapper
133
134 def unfakeftp(self):
135 urllib.request.ftpwrapper = self._ftpwrapper_class
136
137
Brett Cannon74bfd702003-04-25 09:39:47 +0000138class urlopen_FileTests(unittest.TestCase):
139 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000140
Brett Cannon74bfd702003-04-25 09:39:47 +0000141 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000142 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000145
Brett Cannon74bfd702003-04-25 09:39:47 +0000146 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000147 # Create a temp file to use for testing
148 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
149 "ascii")
Hai Shi3ddc6342020-06-30 21:46:06 +0800150 f = open(os_helper.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000153 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 f.close()
Hai Shi3ddc6342020-06-30 21:46:06 +0800155 self.pathname = os_helper.TESTFN
Serhiy Storchaka700cfa82020-06-25 17:56:31 +0300156 self.quoted_pathname = urllib.parse.quote(self.pathname)
157 self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000158
Brett Cannon74bfd702003-04-25 09:39:47 +0000159 def tearDown(self):
160 """Shut down the open object"""
161 self.returned_obj.close()
Hai Shi3ddc6342020-06-30 21:46:06 +0800162 os.remove(os_helper.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000163
Brett Cannon74bfd702003-04-25 09:39:47 +0000164 def test_interface(self):
165 # Make sure object returned by urlopen() has the specified methods
166 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000167 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000168 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 "object returned by urlopen() lacks %s attribute" %
170 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_read(self):
173 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000174
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 def test_readline(self):
176 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000177 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 "calling readline() after exhausting the file did not"
179 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000180
Brett Cannon74bfd702003-04-25 09:39:47 +0000181 def test_readlines(self):
182 lines_list = self.returned_obj.readlines()
183 self.assertEqual(len(lines_list), 1,
184 "readlines() returned the wrong number of lines")
185 self.assertEqual(lines_list[0], self.text,
186 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000187
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 def test_fileno(self):
189 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000190 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000191 self.assertEqual(os.read(file_num, len(self.text)), self.text,
192 "Reading on the file descriptor returned by fileno() "
193 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000194
Brett Cannon74bfd702003-04-25 09:39:47 +0000195 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800196 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 # by the tearDown() method for the test
198 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000199
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700200 def test_headers(self):
201 self.assertIsInstance(self.returned_obj.headers, email.message.Message)
202
203 def test_url(self):
Serhiy Storchaka700cfa82020-06-25 17:56:31 +0300204 self.assertEqual(self.returned_obj.url, self.quoted_pathname)
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700205
206 def test_status(self):
207 self.assertIsNone(self.returned_obj.status)
208
Brett Cannon74bfd702003-04-25 09:39:47 +0000209 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000210 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000211
Brett Cannon74bfd702003-04-25 09:39:47 +0000212 def test_geturl(self):
Serhiy Storchaka700cfa82020-06-25 17:56:31 +0300213 self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000214
Christian Heimes9bd667a2008-01-20 15:14:11 +0000215 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000216 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000217
Brett Cannon74bfd702003-04-25 09:39:47 +0000218 def test_iter(self):
219 # Test iterator
220 # Don't need to count number of iterations since test would fail the
221 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200222 # comparison.
223 # Use the iterator in the usual implicit way to test for ticket #4608.
224 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000225 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000226
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800227 def test_relativelocalfile(self):
228 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
229
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700230
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000231class ProxyTests(unittest.TestCase):
232
233 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000234 # Records changes to env vars
Hai Shi3ddc6342020-06-30 21:46:06 +0800235 self.env = os_helper.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000236 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000237 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000238 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000239 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000240
241 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000242 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000243 self.env.__exit__()
244 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000245
246 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000247 self.env.set('NO_PROXY', 'localhost')
248 proxies = urllib.request.getproxies_environment()
249 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000250 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800251 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700252 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800253 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700254 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
255 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
256
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700257 def test_proxy_cgi_ignore(self):
258 try:
259 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
260 proxies = urllib.request.getproxies_environment()
261 self.assertEqual('http://somewhere:3128', proxies['http'])
262 self.env.set('REQUEST_METHOD', 'GET')
263 proxies = urllib.request.getproxies_environment()
264 self.assertNotIn('http', proxies)
265 finally:
266 self.env.unset('REQUEST_METHOD')
267 self.env.unset('HTTP_PROXY')
268
Martin Panteraa279822016-04-30 01:03:40 +0000269 def test_proxy_bypass_environment_host_match(self):
270 bypass = urllib.request.proxy_bypass_environment
271 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800272 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000273 self.assertTrue(bypass('localhost'))
274 self.assertTrue(bypass('LocalHost')) # MixedCase
275 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200276 self.assertTrue(bypass('.localhost'))
Martin Panteraa279822016-04-30 01:03:40 +0000277 self.assertTrue(bypass('newdomain.com:1234'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200278 self.assertTrue(bypass('.newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800279 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200280 self.assertTrue(bypass('d.o.t'))
Martin Panteraa279822016-04-30 01:03:40 +0000281 self.assertTrue(bypass('anotherdomain.com:8888'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200282 self.assertTrue(bypass('.anotherdomain.com:8888'))
Martin Panteraa279822016-04-30 01:03:40 +0000283 self.assertTrue(bypass('www.newdomain.com:1234'))
284 self.assertFalse(bypass('prelocalhost'))
285 self.assertFalse(bypass('newdomain.com')) # no port
286 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700287
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200288 def test_proxy_bypass_environment_always_match(self):
289 bypass = urllib.request.proxy_bypass_environment
290 self.env.set('NO_PROXY', '*')
291 self.assertTrue(bypass('newdomain.com'))
292 self.assertTrue(bypass('newdomain.com:1234'))
293 self.env.set('NO_PROXY', '*, anotherdomain.com')
294 self.assertTrue(bypass('anotherdomain.com'))
295 self.assertFalse(bypass('newdomain.com'))
296 self.assertFalse(bypass('newdomain.com:1234'))
297
298 def test_proxy_bypass_environment_newline(self):
299 bypass = urllib.request.proxy_bypass_environment
300 self.env.set('NO_PROXY',
301 'localhost, anotherdomain.com, newdomain.com:1234')
302 self.assertFalse(bypass('localhost\n'))
303 self.assertFalse(bypass('anotherdomain.com:8888\n'))
304 self.assertFalse(bypass('newdomain.com:1234\n'))
305
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700306
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700307class ProxyTests_withOrderedEnv(unittest.TestCase):
308
309 def setUp(self):
310 # We need to test conditions, where variable order _is_ significant
311 self._saved_env = os.environ
312 # Monkey patch os.environ, start with empty fake environment
313 os.environ = collections.OrderedDict()
314
315 def tearDown(self):
316 os.environ = self._saved_env
317
318 def test_getproxies_environment_prefer_lowercase(self):
319 # Test lowercase preference with removal
320 os.environ['no_proxy'] = ''
321 os.environ['No_Proxy'] = 'localhost'
322 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
323 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
324 os.environ['http_proxy'] = ''
325 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
326 proxies = urllib.request.getproxies_environment()
327 self.assertEqual({}, proxies)
328 # Test lowercase preference of proxy bypass and correct matching including ports
329 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
330 os.environ['No_Proxy'] = 'xyz.com'
331 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
332 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
333 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
334 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
335 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
336 # Test lowercase preference with replacement
337 os.environ['http_proxy'] = 'http://somewhere:3128'
338 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
339 proxies = urllib.request.getproxies_environment()
340 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000341
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700342
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700343class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000344 """Test urlopen() opening a fake http connection."""
345
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000346 def check_read(self, ver):
347 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000348 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000350 self.assertEqual(fp.readline(), b"Hello!")
351 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000352 self.assertEqual(fp.geturl(), 'http://python.org/')
353 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000354 finally:
355 self.unfakehttp()
356
Senthil Kumaran26430412011-04-13 07:01:19 +0800357 def test_url_fragment(self):
358 # Issue #11703: geturl() omits fragments in the original URL.
359 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800360 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800361 try:
362 fp = urllib.request.urlopen(url)
363 self.assertEqual(fp.geturl(), url)
364 finally:
365 self.unfakehttp()
366
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800367 def test_willclose(self):
368 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800369 try:
370 resp = urlopen("http://www.python.org")
371 self.assertTrue(resp.fp.will_close)
372 finally:
373 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800374
Xtreak2fc936e2019-05-01 17:29:49 +0530375 @unittest.skipUnless(ssl, "ssl module required")
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400376 def test_url_path_with_control_char_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700377 for char_no in list(range(0, 0x21)) + [0x7f]:
378 char = chr(char_no)
379 schemeless_url = f"//localhost:7777/test{char}/"
380 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
381 try:
382 # We explicitly test urllib.request.urlopen() instead of the top
383 # level 'def urlopen()' function defined in this... (quite ugly)
384 # test suite. They use different url opening codepaths. Plain
385 # urlopen uses FancyURLOpener which goes via a codepath that
386 # calls urllib.parse.quote() on the URL which makes all of the
387 # above attempts at injection within the url _path_ safe.
388 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400389 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700390 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400391 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700392 urllib.request.urlopen(f"http:{schemeless_url}")
393 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400394 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700395 urllib.request.urlopen(f"https:{schemeless_url}")
396 # This code path quotes the URL so there is no injection.
397 resp = urlopen(f"http:{schemeless_url}")
398 self.assertNotIn(char, resp.geturl())
399 finally:
400 self.unfakehttp()
401
Xtreak2fc936e2019-05-01 17:29:49 +0530402 @unittest.skipUnless(ssl, "ssl module required")
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400403 def test_url_path_with_newline_header_injection_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700404 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
405 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
406 schemeless_url = "//" + host + ":8080/test/?test=a"
407 try:
408 # We explicitly test urllib.request.urlopen() instead of the top
409 # level 'def urlopen()' function defined in this... (quite ugly)
410 # test suite. They use different url opening codepaths. Plain
411 # urlopen uses FancyURLOpener which goes via a codepath that
412 # calls urllib.parse.quote() on the URL which makes all of the
413 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400414 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700415 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400416 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700417 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400418 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700419 urllib.request.urlopen(f"https:{schemeless_url}")
420 # This code path quotes the URL so there is no injection.
421 resp = urlopen(f"http:{schemeless_url}")
422 self.assertNotIn(' ', resp.geturl())
423 self.assertNotIn('\r', resp.geturl())
424 self.assertNotIn('\n', resp.geturl())
425 finally:
426 self.unfakehttp()
427
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400428 @unittest.skipUnless(ssl, "ssl module required")
429 def test_url_host_with_control_char_rejected(self):
430 for char_no in list(range(0, 0x21)) + [0x7f]:
431 char = chr(char_no)
432 schemeless_url = f"//localhost{char}/test/"
433 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
434 try:
435 escaped_char_repr = repr(char).replace('\\', r'\\')
436 InvalidURL = http.client.InvalidURL
437 with self.assertRaisesRegex(
438 InvalidURL, f"contain control.*{escaped_char_repr}"):
439 urlopen(f"http:{schemeless_url}")
440 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
441 urlopen(f"https:{schemeless_url}")
442 finally:
443 self.unfakehttp()
444
445 @unittest.skipUnless(ssl, "ssl module required")
446 def test_url_host_with_newline_header_injection_rejected(self):
447 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
448 host = "localhost\r\nX-injected: header\r\n"
449 schemeless_url = "//" + host + ":8080/test/?test=a"
450 try:
451 InvalidURL = http.client.InvalidURL
452 with self.assertRaisesRegex(
453 InvalidURL, r"contain control.*\\r"):
454 urlopen(f"http:{schemeless_url}")
455 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
456 urlopen(f"https:{schemeless_url}")
457 finally:
458 self.unfakehttp()
459
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000460 def test_read_0_9(self):
461 # "0.9" response accepted (but not "simple responses" without
462 # a status line)
463 self.check_read(b"0.9")
464
465 def test_read_1_0(self):
466 self.check_read(b"1.0")
467
468 def test_read_1_1(self):
469 self.check_read(b"1.1")
470
Christian Heimes57dddfb2008-01-02 18:30:52 +0000471 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200472 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000473 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
474Date: Wed, 02 Jan 2008 03:03:54 GMT
475Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
476Connection: close
477Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200478''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000479 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200480 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000481 finally:
482 self.unfakehttp()
483
guido@google.coma119df92011-03-29 11:41:02 -0700484 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200485 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700486 self.fakehttp(b'''HTTP/1.1 302 Found
487Date: Wed, 02 Jan 2008 03:03:54 GMT
488Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
489Location: file://guidocomputer.athome.com:/python/license
490Connection: close
491Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200492''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700493 try:
Martin Pantera0370222016-02-04 06:01:35 +0000494 msg = "Redirection to url 'file:"
495 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
496 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700497 finally:
498 self.unfakehttp()
499
Martin Pantera0370222016-02-04 06:01:35 +0000500 def test_redirect_limit_independent(self):
501 # Ticket #12923: make sure independent requests each use their
502 # own retry limit.
503 for i in range(FancyURLopener().maxtries):
504 self.fakehttp(b'''HTTP/1.1 302 Found
505Location: file://guidocomputer.athome.com:/python/license
506Connection: close
Victor Stinnereb976e42019-06-12 04:07:38 +0200507''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000508 try:
509 self.assertRaises(urllib.error.HTTPError, urlopen,
510 "http://something")
511 finally:
512 self.unfakehttp()
513
Guido van Rossumd8faa362007-04-27 19:54:29 +0000514 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200515 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000516 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000517 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000518 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200519 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000520 finally:
521 self.unfakehttp()
522
Senthil Kumaranf5776862012-10-21 13:30:02 -0700523 def test_missing_localfile(self):
524 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700525 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700526 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700527 self.assertTrue(e.exception.filename)
528 self.assertTrue(e.exception.reason)
529
530 def test_file_notexists(self):
531 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700532 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700533 try:
534 self.assertTrue(os.path.exists(tmp_file))
535 with urlopen(tmp_fileurl) as fobj:
536 self.assertTrue(fobj)
537 finally:
538 os.close(fd)
539 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700540 self.assertFalse(os.path.exists(tmp_file))
541 with self.assertRaises(urllib.error.URLError):
542 urlopen(tmp_fileurl)
543
544 def test_ftp_nohost(self):
545 test_ftp_url = 'ftp:///path'
546 with self.assertRaises(urllib.error.URLError) as e:
547 urlopen(test_ftp_url)
548 self.assertFalse(e.exception.filename)
549 self.assertTrue(e.exception.reason)
550
551 def test_ftp_nonexisting(self):
552 with self.assertRaises(urllib.error.URLError) as e:
553 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
554 self.assertFalse(e.exception.filename)
555 self.assertTrue(e.exception.reason)
556
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700557 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
558 def test_ftp_cache_pruning(self):
559 self.fakeftp()
560 try:
561 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
562 urlopen('ftp://localhost')
563 finally:
564 self.unfakeftp()
565
Senthil Kumarande0eb242010-08-01 17:53:37 +0000566 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000567 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000568 try:
569 fp = urlopen("http://user:pass@python.org/")
570 self.assertEqual(fp.readline(), b"Hello!")
571 self.assertEqual(fp.readline(), b"")
572 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
573 self.assertEqual(fp.getcode(), 200)
574 finally:
575 self.unfakehttp()
576
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800577 def test_userpass_inurl_w_spaces(self):
578 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
579 try:
580 userpass = "a b:c d"
581 url = "http://{}@python.org/".format(userpass)
582 fakehttp_wrapper = http.client.HTTPConnection
583 authorization = ("Authorization: Basic %s\r\n" %
584 b64encode(userpass.encode("ASCII")).decode("ASCII"))
585 fp = urlopen(url)
586 # The authorization header must be in place
587 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
588 self.assertEqual(fp.readline(), b"Hello!")
589 self.assertEqual(fp.readline(), b"")
590 # the spaces are quoted in URL so no match
591 self.assertNotEqual(fp.geturl(), url)
592 self.assertEqual(fp.getcode(), 200)
593 finally:
594 self.unfakehttp()
595
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700596 def test_URLopener_deprecation(self):
Hai Shi3ddc6342020-06-30 21:46:06 +0800597 with warnings_helper.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700598 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700599
Antoine Pitrou07df6552014-11-02 17:23:14 +0100600 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800601 def test_cafile_and_context(self):
602 context = ssl.create_default_context()
Hai Shi3ddc6342020-06-30 21:46:06 +0800603 with warnings_helper.check_warnings(('', DeprecationWarning)):
Christian Heimesd0486372016-09-10 23:23:33 +0200604 with self.assertRaises(ValueError):
605 urllib.request.urlopen(
606 "https://localhost", cafile="/nonexistent/path", context=context
607 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800608
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700609
Antoine Pitroudf204be2012-11-24 17:59:08 +0100610class urlopen_DataTests(unittest.TestCase):
611 """Test urlopen() opening a data URL."""
612
613 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200614 # clear _opener global variable
615 self.addCleanup(urllib.request.urlcleanup)
616
Antoine Pitroudf204be2012-11-24 17:59:08 +0100617 # text containing URL special- and unicode-characters
618 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
619 # 2x1 pixel RGB PNG image with one black and one white pixel
620 self.image = (
621 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
622 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
623 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
624 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
625
626 self.text_url = (
627 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
628 "D%26%20%C3%B6%20%C3%84%20")
629 self.text_url_base64 = (
630 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
631 "sJT0mIPYgxCA%3D")
632 # base64 encoded data URL that contains ignorable spaces,
633 # such as "\n", " ", "%0A", and "%20".
634 self.image_url = (
635 "\n"
636 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
637 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
638
639 self.text_url_resp = urllib.request.urlopen(self.text_url)
640 self.text_url_base64_resp = urllib.request.urlopen(
641 self.text_url_base64)
642 self.image_url_resp = urllib.request.urlopen(self.image_url)
643
644 def test_interface(self):
645 # Make sure object returned by urlopen() has the specified methods
646 for attr in ("read", "readline", "readlines",
647 "close", "info", "geturl", "getcode", "__iter__"):
648 self.assertTrue(hasattr(self.text_url_resp, attr),
649 "object returned by urlopen() lacks %s attribute" %
650 attr)
651
652 def test_info(self):
653 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
654 self.assertEqual(self.text_url_base64_resp.info().get_params(),
655 [('text/plain', ''), ('charset', 'ISO-8859-1')])
656 self.assertEqual(self.image_url_resp.info()['content-length'],
657 str(len(self.image)))
658 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
659 [('text/plain', ''), ('charset', 'US-ASCII')])
660
661 def test_geturl(self):
662 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
663 self.assertEqual(self.text_url_base64_resp.geturl(),
664 self.text_url_base64)
665 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
666
667 def test_read_text(self):
668 self.assertEqual(self.text_url_resp.read().decode(
669 dict(self.text_url_resp.info().get_params())['charset']), self.text)
670
671 def test_read_text_base64(self):
672 self.assertEqual(self.text_url_base64_resp.read().decode(
673 dict(self.text_url_base64_resp.info().get_params())['charset']),
674 self.text)
675
676 def test_read_image(self):
677 self.assertEqual(self.image_url_resp.read(), self.image)
678
679 def test_missing_comma(self):
680 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
681
682 def test_invalid_base64_data(self):
683 # missing padding character
684 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
685
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700686
Brett Cannon19691362003-04-29 05:08:06 +0000687class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000688 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000689
Brett Cannon19691362003-04-29 05:08:06 +0000690 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200691 # clear _opener global variable
692 self.addCleanup(urllib.request.urlcleanup)
693
Georg Brandl5a650a22005-08-26 08:51:34 +0000694 # Create a list of temporary files. Each item in the list is a file
695 # name (absolute path or relative to the current working directory).
696 # All files in this list will be deleted in the tearDown method. Note,
697 # this only helps to makes sure temporary files get deleted, but it
698 # does nothing about trying to close files that may still be open. It
699 # is the responsibility of the developer to properly close files even
700 # when exceptional conditions occur.
701 self.tempFiles = []
702
Brett Cannon19691362003-04-29 05:08:06 +0000703 # Create a temporary file.
Hai Shi3ddc6342020-06-30 21:46:06 +0800704 self.registerFileForCleanUp(os_helper.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000705 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000706 try:
Hai Shi3ddc6342020-06-30 21:46:06 +0800707 FILE = open(os_helper.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000708 FILE.write(self.text)
709 FILE.close()
710 finally:
711 try: FILE.close()
712 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000713
714 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000715 # Delete the temporary files.
716 for each in self.tempFiles:
717 try: os.remove(each)
718 except: pass
719
720 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000721 filePath = os.path.abspath(filePath)
722 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000723 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000724 except UnicodeEncodeError:
725 raise unittest.SkipTest("filePath is not encodable to utf8")
726 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000727
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000728 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000729 """Creates a new temporary file containing the specified data,
730 registers the file for deletion during the test fixture tear down, and
731 returns the absolute path of the file."""
732
733 newFd, newFilePath = tempfile.mkstemp()
734 try:
735 self.registerFileForCleanUp(newFilePath)
736 newFile = os.fdopen(newFd, "wb")
737 newFile.write(data)
738 newFile.close()
739 finally:
740 try: newFile.close()
741 except: pass
742 return newFilePath
743
744 def registerFileForCleanUp(self, fileName):
745 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000746
747 def test_basic(self):
748 # Make sure that a local file just gets its own location returned and
749 # a headers value is returned.
Hai Shi3ddc6342020-06-30 21:46:06 +0800750 result = urllib.request.urlretrieve("file:%s" % os_helper.TESTFN)
751 self.assertEqual(result[0], os_helper.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000752 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000753 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000754 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000755
756 def test_copy(self):
757 # Test that setting the filename argument works.
Hai Shi3ddc6342020-06-30 21:46:06 +0800758 second_temp = "%s.2" % os_helper.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000759 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000760 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Hai Shi3ddc6342020-06-30 21:46:06 +0800761 os_helper.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000762 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000763 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000764 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000765 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000766 try:
767 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000768 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000769 finally:
770 try: FILE.close()
771 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000772 self.assertEqual(self.text, text)
773
774 def test_reporthook(self):
775 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700776 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
777 self.assertIsInstance(block_count, int)
778 self.assertIsInstance(block_read_size, int)
779 self.assertIsInstance(file_size, int)
780 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000781 count_holder[0] = count_holder[0] + 1
Hai Shi3ddc6342020-06-30 21:46:06 +0800782 second_temp = "%s.2" % os_helper.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000783 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000784 urllib.request.urlretrieve(
Hai Shi3ddc6342020-06-30 21:46:06 +0800785 self.constructLocalFileUrl(os_helper.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000786 second_temp, hooktester)
787
788 def test_reporthook_0_bytes(self):
789 # Test on zero length file. Should call reporthook only 1 time.
790 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700791 def hooktester(block_count, block_read_size, file_size, _report=report):
792 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000793 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000794 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Hai Shi3ddc6342020-06-30 21:46:06 +0800795 os_helper.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000796 self.assertEqual(len(report), 1)
797 self.assertEqual(report[0][2], 0)
798
799 def test_reporthook_5_bytes(self):
800 # Test on 5 byte file. Should call reporthook only 2 times (once when
801 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700802 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000803 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700804 def hooktester(block_count, block_read_size, file_size, _report=report):
805 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000806 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000807 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Hai Shi3ddc6342020-06-30 21:46:06 +0800808 os_helper.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000809 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800810 self.assertEqual(report[0][2], 5)
811 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000812
813 def test_reporthook_8193_bytes(self):
814 # Test on 8193 byte file. Should call reporthook only 3 times (once
815 # when the "network connection" is established, once for the next 8192
816 # bytes, and once for the last byte).
817 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700818 def hooktester(block_count, block_read_size, file_size, _report=report):
819 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000820 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000821 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Hai Shi3ddc6342020-06-30 21:46:06 +0800822 os_helper.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000823 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800824 self.assertEqual(report[0][2], 8193)
825 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700826 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800827 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000828
Senthil Kumarance260142011-11-01 01:35:17 +0800829
830class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
831 """Test urllib.urlretrieve() using fake http connections"""
832
833 def test_short_content_raises_ContentTooShortError(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200834 self.addCleanup(urllib.request.urlcleanup)
835
Senthil Kumarance260142011-11-01 01:35:17 +0800836 self.fakehttp(b'''HTTP/1.1 200 OK
837Date: Wed, 02 Jan 2008 03:03:54 GMT
838Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
839Connection: close
840Content-Length: 100
841Content-Type: text/html; charset=iso-8859-1
842
843FF
844''')
845
846 def _reporthook(par1, par2, par3):
847 pass
848
849 with self.assertRaises(urllib.error.ContentTooShortError):
850 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100851 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800852 reporthook=_reporthook)
853 finally:
854 self.unfakehttp()
855
856 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200857 self.addCleanup(urllib.request.urlcleanup)
858
Senthil Kumarance260142011-11-01 01:35:17 +0800859 self.fakehttp(b'''HTTP/1.1 200 OK
860Date: Wed, 02 Jan 2008 03:03:54 GMT
861Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
862Connection: close
863Content-Length: 100
864Content-Type: text/html; charset=iso-8859-1
865
866FF
867''')
868 with self.assertRaises(urllib.error.ContentTooShortError):
869 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100870 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800871 finally:
872 self.unfakehttp()
873
874
Brett Cannon74bfd702003-04-25 09:39:47 +0000875class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400876 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000877
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530878 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000879 character you write it as '%' + <2 character US-ASCII hex value>.
880 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
881 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000882
883 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000884
Brett Cannon74bfd702003-04-25 09:39:47 +0000885 Reserved characters : ";/?:@&=+$,"
886 Have special meaning in URIs and must be escaped if not being used for
887 their special meaning
888 Data characters : letters, digits, and "-_.!~*'()"
889 Unreserved and do not need to be escaped; can be, though, if desired
890 Control characters : 0x00 - 0x1F, 0x7F
891 Have no use in URIs so must be escaped
892 space : 0x20
893 Must be escaped
894 Delimiters : '<>#%"'
895 Must be escaped
896 Unwise : "{}|\^[]`"
897 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000898
Brett Cannon74bfd702003-04-25 09:39:47 +0000899 """
900
901 def test_never_quote(self):
902 # Make sure quote() does not quote letters, digits, and "_,.-"
903 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
904 "abcdefghijklmnopqrstuvwxyz",
905 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530906 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000907 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000908 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000909 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000910 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000911 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000912 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000913
914 def test_default_safe(self):
915 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000916 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000917
918 def test_safe(self):
919 # Test setting 'safe' parameter does what it should do
920 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000921 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000922 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000923 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000924 result = urllib.parse.quote_plus(quote_by_default,
925 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000926 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000927 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000928 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000929 # Safe expressed as bytes rather than str
930 result = urllib.parse.quote(quote_by_default, safe=b"<>")
931 self.assertEqual(quote_by_default, result,
932 "using quote(): %r != %r" % (quote_by_default, result))
933 # "Safe" non-ASCII characters should have no effect
934 # (Since URIs are not allowed to have non-ASCII characters)
935 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
936 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
937 self.assertEqual(expect, result,
938 "using quote(): %r != %r" %
939 (expect, result))
940 # Same as above, but using a bytes rather than str
941 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
942 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
943 self.assertEqual(expect, result,
944 "using quote(): %r != %r" %
945 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000946
947 def test_default_quoting(self):
948 # Make sure all characters that should be quoted are by default sans
949 # space (separate test for that).
950 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400951 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000952 should_quote.append(chr(127)) # For 0x7F
953 should_quote = ''.join(should_quote)
954 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000955 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000956 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000957 "using quote(): "
958 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000959 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000960 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000961 self.assertEqual(hexescape(char), result,
962 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000963 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000964 (char, hexescape(char), result))
965 del should_quote
966 partial_quote = "ab[]cd"
967 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000968 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000969 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000970 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800971 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000972 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000973 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000974
975 def test_quoting_space(self):
976 # Make sure quote() and quote_plus() handle spaces as specified in
977 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000978 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000979 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000980 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000981 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000982 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000983 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000984 given = "a b cd e f"
985 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000986 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000987 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000988 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000989 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000990 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000991 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000992 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000993
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000994 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000995 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000996 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000997 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000998 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000999 # Test with bytes
1000 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
1001 'alpha%2Bbeta+gamma')
1002 # Test with safe bytes
1003 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
1004 'alpha+beta+gamma')
1005
1006 def test_quote_bytes(self):
1007 # Bytes should quote directly to percent-encoded values
1008 given = b"\xa2\xd8ab\xff"
1009 expect = "%A2%D8ab%FF"
1010 result = urllib.parse.quote(given)
1011 self.assertEqual(expect, result,
1012 "using quote(): %r != %r" % (expect, result))
1013 # Encoding argument should raise type error on bytes input
1014 self.assertRaises(TypeError, urllib.parse.quote, given,
1015 encoding="latin-1")
1016 # quote_from_bytes should work the same
1017 result = urllib.parse.quote_from_bytes(given)
1018 self.assertEqual(expect, result,
1019 "using quote_from_bytes(): %r != %r"
1020 % (expect, result))
1021
1022 def test_quote_with_unicode(self):
1023 # Characters in Latin-1 range, encoded by default in UTF-8
1024 given = "\xa2\xd8ab\xff"
1025 expect = "%C2%A2%C3%98ab%C3%BF"
1026 result = urllib.parse.quote(given)
1027 self.assertEqual(expect, result,
1028 "using quote(): %r != %r" % (expect, result))
1029 # Characters in Latin-1 range, encoded by with None (default)
1030 result = urllib.parse.quote(given, encoding=None, errors=None)
1031 self.assertEqual(expect, result,
1032 "using quote(): %r != %r" % (expect, result))
1033 # Characters in Latin-1 range, encoded with Latin-1
1034 given = "\xa2\xd8ab\xff"
1035 expect = "%A2%D8ab%FF"
1036 result = urllib.parse.quote(given, encoding="latin-1")
1037 self.assertEqual(expect, result,
1038 "using quote(): %r != %r" % (expect, result))
1039 # Characters in BMP, encoded by default in UTF-8
1040 given = "\u6f22\u5b57" # "Kanji"
1041 expect = "%E6%BC%A2%E5%AD%97"
1042 result = urllib.parse.quote(given)
1043 self.assertEqual(expect, result,
1044 "using quote(): %r != %r" % (expect, result))
1045 # Characters in BMP, encoded with Latin-1
1046 given = "\u6f22\u5b57"
1047 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1048 encoding="latin-1")
1049 # Characters in BMP, encoded with Latin-1, with replace error handling
1050 given = "\u6f22\u5b57"
1051 expect = "%3F%3F" # "??"
1052 result = urllib.parse.quote(given, encoding="latin-1",
1053 errors="replace")
1054 self.assertEqual(expect, result,
1055 "using quote(): %r != %r" % (expect, result))
1056 # Characters in BMP, Latin-1, with xmlcharref error handling
1057 given = "\u6f22\u5b57"
1058 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1059 result = urllib.parse.quote(given, encoding="latin-1",
1060 errors="xmlcharrefreplace")
1061 self.assertEqual(expect, result,
1062 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001063
Georg Brandlfaf41492009-05-26 18:31:11 +00001064 def test_quote_plus_with_unicode(self):
1065 # Encoding (latin-1) test for quote_plus
1066 given = "\xa2\xd8 \xff"
1067 expect = "%A2%D8+%FF"
1068 result = urllib.parse.quote_plus(given, encoding="latin-1")
1069 self.assertEqual(expect, result,
1070 "using quote_plus(): %r != %r" % (expect, result))
1071 # Errors test for quote_plus
1072 given = "ab\u6f22\u5b57 cd"
1073 expect = "ab%3F%3F+cd"
1074 result = urllib.parse.quote_plus(given, encoding="latin-1",
1075 errors="replace")
1076 self.assertEqual(expect, result,
1077 "using quote_plus(): %r != %r" % (expect, result))
1078
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001079
Brett Cannon74bfd702003-04-25 09:39:47 +00001080class UnquotingTests(unittest.TestCase):
1081 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001082
Brett Cannon74bfd702003-04-25 09:39:47 +00001083 See the doc string for quoting_Tests for details on quoting and such.
1084
1085 """
1086
1087 def test_unquoting(self):
1088 # Make sure unquoting of all ASCII values works
1089 escape_list = []
1090 for num in range(128):
1091 given = hexescape(chr(num))
1092 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001093 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001094 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001095 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001096 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001097 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001098 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001099 (expect, result))
1100 escape_list.append(given)
1101 escape_string = ''.join(escape_list)
1102 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001103 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001104 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001105 "using unquote(): not all characters escaped: "
1106 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001107 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1108 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Brett Cannon74bfd702003-04-25 09:39:47 +00001109
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001110 def test_unquoting_badpercent(self):
1111 # Test unquoting on bad percent-escapes
1112 given = '%xab'
1113 expect = given
1114 result = urllib.parse.unquote(given)
1115 self.assertEqual(expect, result, "using unquote(): %r != %r"
1116 % (expect, result))
1117 given = '%x'
1118 expect = given
1119 result = urllib.parse.unquote(given)
1120 self.assertEqual(expect, result, "using unquote(): %r != %r"
1121 % (expect, result))
1122 given = '%'
1123 expect = given
1124 result = urllib.parse.unquote(given)
1125 self.assertEqual(expect, result, "using unquote(): %r != %r"
1126 % (expect, result))
1127 # unquote_to_bytes
1128 given = '%xab'
1129 expect = bytes(given, 'ascii')
1130 result = urllib.parse.unquote_to_bytes(given)
1131 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1132 % (expect, result))
1133 given = '%x'
1134 expect = bytes(given, 'ascii')
1135 result = urllib.parse.unquote_to_bytes(given)
1136 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1137 % (expect, result))
1138 given = '%'
1139 expect = bytes(given, 'ascii')
1140 result = urllib.parse.unquote_to_bytes(given)
1141 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1142 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001143 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1144 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001145
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001146 def test_unquoting_mixed_case(self):
1147 # Test unquoting on mixed-case hex digits in the percent-escapes
1148 given = '%Ab%eA'
1149 expect = b'\xab\xea'
1150 result = urllib.parse.unquote_to_bytes(given)
1151 self.assertEqual(expect, result,
1152 "using unquote_to_bytes(): %r != %r"
1153 % (expect, result))
1154
Brett Cannon74bfd702003-04-25 09:39:47 +00001155 def test_unquoting_parts(self):
1156 # Make sure unquoting works when have non-quoted characters
1157 # interspersed
1158 given = 'ab%sd' % hexescape('c')
1159 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001160 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001161 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001162 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001163 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001164 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001165 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001166
Brett Cannon74bfd702003-04-25 09:39:47 +00001167 def test_unquoting_plus(self):
1168 # Test difference between unquote() and unquote_plus()
1169 given = "are+there+spaces..."
1170 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001171 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001172 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001173 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001174 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001175 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001176 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001177 "using unquote_plus(): %r != %r" % (expect, result))
1178
1179 def test_unquote_to_bytes(self):
1180 given = 'br%C3%BCckner_sapporo_20050930.doc'
1181 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1182 result = urllib.parse.unquote_to_bytes(given)
1183 self.assertEqual(expect, result,
1184 "using unquote_to_bytes(): %r != %r"
1185 % (expect, result))
1186 # Test on a string with unescaped non-ASCII characters
1187 # (Technically an invalid URI; expect those characters to be UTF-8
1188 # encoded).
1189 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1190 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1191 self.assertEqual(expect, result,
1192 "using unquote_to_bytes(): %r != %r"
1193 % (expect, result))
1194 # Test with a bytes as input
1195 given = b'%A2%D8ab%FF'
1196 expect = b'\xa2\xd8ab\xff'
1197 result = urllib.parse.unquote_to_bytes(given)
1198 self.assertEqual(expect, result,
1199 "using unquote_to_bytes(): %r != %r"
1200 % (expect, result))
1201 # Test with a bytes as input, with unescaped non-ASCII bytes
1202 # (Technically an invalid URI; expect those bytes to be preserved)
1203 given = b'%A2\xd8ab%FF'
1204 expect = b'\xa2\xd8ab\xff'
1205 result = urllib.parse.unquote_to_bytes(given)
1206 self.assertEqual(expect, result,
1207 "using unquote_to_bytes(): %r != %r"
1208 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001209
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001210 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001211 # Characters in the Latin-1 range, encoded with UTF-8
1212 given = 'br%C3%BCckner_sapporo_20050930.doc'
1213 expect = 'br\u00fcckner_sapporo_20050930.doc'
1214 result = urllib.parse.unquote(given)
1215 self.assertEqual(expect, result,
1216 "using unquote(): %r != %r" % (expect, result))
1217 # Characters in the Latin-1 range, encoded with None (default)
1218 result = urllib.parse.unquote(given, encoding=None, errors=None)
1219 self.assertEqual(expect, result,
1220 "using unquote(): %r != %r" % (expect, result))
1221
1222 # Characters in the Latin-1 range, encoded with Latin-1
1223 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1224 encoding="latin-1")
1225 expect = 'br\u00fcckner_sapporo_20050930.doc'
1226 self.assertEqual(expect, result,
1227 "using unquote(): %r != %r" % (expect, result))
1228
1229 # Characters in BMP, encoded with UTF-8
1230 given = "%E6%BC%A2%E5%AD%97"
1231 expect = "\u6f22\u5b57" # "Kanji"
1232 result = urllib.parse.unquote(given)
1233 self.assertEqual(expect, result,
1234 "using unquote(): %r != %r" % (expect, result))
1235
1236 # Decode with UTF-8, invalid sequence
1237 given = "%F3%B1"
1238 expect = "\ufffd" # Replacement character
1239 result = urllib.parse.unquote(given)
1240 self.assertEqual(expect, result,
1241 "using unquote(): %r != %r" % (expect, result))
1242
1243 # Decode with UTF-8, invalid sequence, replace errors
1244 result = urllib.parse.unquote(given, errors="replace")
1245 self.assertEqual(expect, result,
1246 "using unquote(): %r != %r" % (expect, result))
1247
1248 # Decode with UTF-8, invalid sequence, ignoring errors
1249 given = "%F3%B1"
1250 expect = ""
1251 result = urllib.parse.unquote(given, errors="ignore")
1252 self.assertEqual(expect, result,
1253 "using unquote(): %r != %r" % (expect, result))
1254
1255 # A mix of non-ASCII and percent-encoded characters, UTF-8
1256 result = urllib.parse.unquote("\u6f22%C3%BC")
1257 expect = '\u6f22\u00fc'
1258 self.assertEqual(expect, result,
1259 "using unquote(): %r != %r" % (expect, result))
1260
1261 # A mix of non-ASCII and percent-encoded characters, Latin-1
1262 # (Note, the string contains non-Latin-1-representable characters)
1263 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1264 expect = '\u6f22\u00fc'
1265 self.assertEqual(expect, result,
1266 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001267
Stein Karlsenaad2ee02019-10-14 12:36:29 +02001268 def test_unquoting_with_bytes_input(self):
1269 # ASCII characters decoded to a string
1270 given = b'blueberryjam'
1271 expect = 'blueberryjam'
1272 result = urllib.parse.unquote(given)
1273 self.assertEqual(expect, result,
1274 "using unquote(): %r != %r" % (expect, result))
1275
1276 # A mix of non-ASCII hex-encoded characters and ASCII characters
1277 given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
1278 expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
1279 result = urllib.parse.unquote(given)
1280 self.assertEqual(expect, result,
1281 "using unquote(): %r != %r" % (expect, result))
1282
1283 # A mix of non-ASCII percent-encoded characters and ASCII characters
1284 given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
1285 expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
1286 result = urllib.parse.unquote(given)
1287 self.assertEqual(expect, result,
1288 "using unquote(): %r != %r" % (expect, result))
1289
1290
Brett Cannon74bfd702003-04-25 09:39:47 +00001291class urlencode_Tests(unittest.TestCase):
1292 """Tests for urlencode()"""
1293
1294 def help_inputtype(self, given, test_type):
1295 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001296
Brett Cannon74bfd702003-04-25 09:39:47 +00001297 'given' must lead to only the pairs:
1298 * 1st, 1
1299 * 2nd, 2
1300 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001301
Brett Cannon74bfd702003-04-25 09:39:47 +00001302 Test cannot assume anything about order. Docs make no guarantee and
1303 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001304
Brett Cannon74bfd702003-04-25 09:39:47 +00001305 """
1306 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001307 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001308 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001309 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001310 "testing %s: %s not found in %s" %
1311 (test_type, expected, result))
1312 self.assertEqual(result.count('&'), 2,
1313 "testing %s: expected 2 '&'s; got %s" %
1314 (test_type, result.count('&')))
1315 amp_location = result.index('&')
1316 on_amp_left = result[amp_location - 1]
1317 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001318 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001319 "testing %s: '&' not located in proper place in %s" %
1320 (test_type, result))
1321 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1322 "testing %s: "
1323 "unexpected number of characters: %s != %s" %
1324 (test_type, len(result), (5 * 3) + 2))
1325
1326 def test_using_mapping(self):
1327 # Test passing in a mapping object as an argument.
1328 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1329 "using dict as input type")
1330
1331 def test_using_sequence(self):
1332 # Test passing in a sequence of two-item sequences as an argument.
1333 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1334 "using sequence of two-item tuples as input")
1335
1336 def test_quoting(self):
1337 # Make sure keys and values are quoted using quote_plus()
1338 given = {"&":"="}
1339 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001340 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001341 self.assertEqual(expect, result)
1342 given = {"key name":"A bunch of pluses"}
1343 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001344 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001345 self.assertEqual(expect, result)
1346
1347 def test_doseq(self):
1348 # Test that passing True for 'doseq' parameter works correctly
1349 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001350 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1351 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001352 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001353 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001354 for value in given["sequence"]:
1355 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001356 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001357 self.assertEqual(result.count('&'), 2,
1358 "Expected 2 '&'s, got %s" % result.count('&'))
1359
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001360 def test_empty_sequence(self):
1361 self.assertEqual("", urllib.parse.urlencode({}))
1362 self.assertEqual("", urllib.parse.urlencode([]))
1363
1364 def test_nonstring_values(self):
1365 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1366 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1367
1368 def test_nonstring_seq_values(self):
1369 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1370 self.assertEqual("a=None&a=a",
1371 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001372 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001373 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001374 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001375
Senthil Kumarandf022da2010-07-03 17:48:22 +00001376 def test_urlencode_encoding(self):
1377 # ASCII encoding. Expect %3F with errors="replace'
1378 given = (('\u00a0', '\u00c1'),)
1379 expect = '%3F=%3F'
1380 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1381 self.assertEqual(expect, result)
1382
1383 # Default is UTF-8 encoding.
1384 given = (('\u00a0', '\u00c1'),)
1385 expect = '%C2%A0=%C3%81'
1386 result = urllib.parse.urlencode(given)
1387 self.assertEqual(expect, result)
1388
1389 # Latin-1 encoding.
1390 given = (('\u00a0', '\u00c1'),)
1391 expect = '%A0=%C1'
1392 result = urllib.parse.urlencode(given, encoding="latin-1")
1393 self.assertEqual(expect, result)
1394
1395 def test_urlencode_encoding_doseq(self):
1396 # ASCII Encoding. Expect %3F with errors="replace'
1397 given = (('\u00a0', '\u00c1'),)
1398 expect = '%3F=%3F'
1399 result = urllib.parse.urlencode(given, doseq=True,
1400 encoding="ASCII", errors="replace")
1401 self.assertEqual(expect, result)
1402
1403 # ASCII Encoding. On a sequence of values.
1404 given = (("\u00a0", (1, "\u00c1")),)
1405 expect = '%3F=1&%3F=%3F'
1406 result = urllib.parse.urlencode(given, True,
1407 encoding="ASCII", errors="replace")
1408 self.assertEqual(expect, result)
1409
1410 # Utf-8
1411 given = (("\u00a0", "\u00c1"),)
1412 expect = '%C2%A0=%C3%81'
1413 result = urllib.parse.urlencode(given, True)
1414 self.assertEqual(expect, result)
1415
1416 given = (("\u00a0", (42, "\u00c1")),)
1417 expect = '%C2%A0=42&%C2%A0=%C3%81'
1418 result = urllib.parse.urlencode(given, True)
1419 self.assertEqual(expect, result)
1420
1421 # latin-1
1422 given = (("\u00a0", "\u00c1"),)
1423 expect = '%A0=%C1'
1424 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1425 self.assertEqual(expect, result)
1426
1427 given = (("\u00a0", (42, "\u00c1")),)
1428 expect = '%A0=42&%A0=%C1'
1429 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1430 self.assertEqual(expect, result)
1431
1432 def test_urlencode_bytes(self):
1433 given = ((b'\xa0\x24', b'\xc1\x24'),)
1434 expect = '%A0%24=%C1%24'
1435 result = urllib.parse.urlencode(given)
1436 self.assertEqual(expect, result)
1437 result = urllib.parse.urlencode(given, True)
1438 self.assertEqual(expect, result)
1439
1440 # Sequence of values
1441 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1442 expect = '%A0%24=42&%A0%24=%C1%24'
1443 result = urllib.parse.urlencode(given, True)
1444 self.assertEqual(expect, result)
1445
1446 def test_urlencode_encoding_safe_parameter(self):
1447
1448 # Send '$' (\x24) as safe character
1449 # Default utf-8 encoding
1450
1451 given = ((b'\xa0\x24', b'\xc1\x24'),)
1452 result = urllib.parse.urlencode(given, safe=":$")
1453 expect = '%A0$=%C1$'
1454 self.assertEqual(expect, result)
1455
1456 given = ((b'\xa0\x24', b'\xc1\x24'),)
1457 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1458 expect = '%A0$=%C1$'
1459 self.assertEqual(expect, result)
1460
1461 # Safe parameter in sequence
1462 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1463 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1464 result = urllib.parse.urlencode(given, True, safe=":$")
1465 self.assertEqual(expect, result)
1466
1467 # Test all above in latin-1 encoding
1468
1469 given = ((b'\xa0\x24', b'\xc1\x24'),)
1470 result = urllib.parse.urlencode(given, safe=":$",
1471 encoding="latin-1")
1472 expect = '%A0$=%C1$'
1473 self.assertEqual(expect, result)
1474
1475 given = ((b'\xa0\x24', b'\xc1\x24'),)
1476 expect = '%A0$=%C1$'
1477 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1478 encoding="latin-1")
1479
1480 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1481 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1482 result = urllib.parse.urlencode(given, True, safe=":$",
1483 encoding="latin-1")
1484 self.assertEqual(expect, result)
1485
Brett Cannon74bfd702003-04-25 09:39:47 +00001486class Pathname_Tests(unittest.TestCase):
1487 """Test pathname2url() and url2pathname()"""
1488
1489 def test_basic(self):
1490 # Make sure simple tests pass
1491 expected_path = os.path.join("parts", "of", "a", "path")
1492 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001493 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001494 self.assertEqual(expected_url, result,
1495 "pathname2url() failed; %s != %s" %
1496 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001497 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001498 self.assertEqual(expected_path, result,
1499 "url2pathame() failed; %s != %s" %
1500 (result, expected_path))
1501
1502 def test_quoting(self):
1503 # Test automatic quoting and unquoting works for pathnam2url() and
1504 # url2pathname() respectively
1505 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001506 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1507 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001508 self.assertEqual(expect, result,
1509 "pathname2url() failed; %s != %s" %
1510 (expect, result))
1511 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001512 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001513 self.assertEqual(expect, result,
1514 "url2pathname() failed; %s != %s" %
1515 (expect, result))
1516 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001517 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1518 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001519 self.assertEqual(expect, result,
1520 "pathname2url() failed; %s != %s" %
1521 (expect, result))
1522 given = "make+sure/using_unquote"
1523 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001524 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001525 self.assertEqual(expect, result,
1526 "url2pathname() failed; %s != %s" %
1527 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001528
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001529 @unittest.skipUnless(sys.platform == 'win32',
1530 'test specific to the urllib.url2path function.')
1531 def test_ntpath(self):
1532 given = ('/C:/', '///C:/', '/C|//')
1533 expect = 'C:\\'
1534 for url in given:
1535 result = urllib.request.url2pathname(url)
1536 self.assertEqual(expect, result,
1537 'urllib.request..url2pathname() failed; %s != %s' %
1538 (expect, result))
1539 given = '///C|/path'
1540 expect = 'C:\\path'
1541 result = urllib.request.url2pathname(given)
1542 self.assertEqual(expect, result,
1543 'urllib.request.url2pathname() failed; %s != %s' %
1544 (expect, result))
1545
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001546class Utility_Tests(unittest.TestCase):
1547 """Testcase to test the various utility functions in the urllib."""
1548
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001549 def test_thishost(self):
1550 """Test the urllib.request.thishost utility function returns a tuple"""
1551 self.assertIsInstance(urllib.request.thishost(), tuple)
1552
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001553
Xtreakc661b302019-05-19 19:10:06 +05301554class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001555 """Testcase to test the open method of URLopener class."""
1556
1557 def test_quoted_open(self):
1558 class DummyURLopener(urllib.request.URLopener):
1559 def open_spam(self, url):
1560 return url
Hai Shi3ddc6342020-06-30 21:46:06 +08001561 with warnings_helper.check_warnings(
Ezio Melotti79b99db2013-02-21 02:41:42 +02001562 ('DummyURLopener style of invoking requests is deprecated.',
1563 DeprecationWarning)):
1564 self.assertEqual(DummyURLopener().open(
1565 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001566
Ezio Melotti79b99db2013-02-21 02:41:42 +02001567 # test the safe characters are not quoted by urlopen
1568 self.assertEqual(DummyURLopener().open(
1569 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1570 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001571
Hai Shi3ddc6342020-06-30 21:46:06 +08001572 @warnings_helper.ignore_warnings(category=DeprecationWarning)
Xtreakc661b302019-05-19 19:10:06 +05301573 def test_urlopener_retrieve_file(self):
Hai Shi3ddc6342020-06-30 21:46:06 +08001574 with os_helper.temp_dir() as tmpdir:
Xtreakc661b302019-05-19 19:10:06 +05301575 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1576 os.close(fd)
1577 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1578 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001579 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1580 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301581
Hai Shi3ddc6342020-06-30 21:46:06 +08001582 @warnings_helper.ignore_warnings(category=DeprecationWarning)
Xtreakc661b302019-05-19 19:10:06 +05301583 def test_urlopener_retrieve_remote(self):
1584 url = "http://www.python.org/file.txt"
1585 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1586 self.addCleanup(self.unfakehttp)
1587 filename, _ = urllib.request.URLopener().retrieve(url)
1588 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1589
Hai Shi3ddc6342020-06-30 21:46:06 +08001590 @warnings_helper.ignore_warnings(category=DeprecationWarning)
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001591 def test_local_file_open(self):
1592 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1593 class DummyURLopener(urllib.request.URLopener):
1594 def open_local_file(self, url):
1595 return url
1596 for url in ('local_file://example', 'local-file://example'):
1597 self.assertRaises(OSError, urllib.request.urlopen, url)
1598 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1599 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1600 self.assertRaises(OSError, DummyURLopener().open, url)
1601 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1602
Xtreakc661b302019-05-19 19:10:06 +05301603
Senthil Kumarande49d642011-10-16 23:54:44 +08001604class RequestTests(unittest.TestCase):
1605 """Unit tests for urllib.request.Request."""
1606
1607 def test_default_values(self):
1608 Request = urllib.request.Request
1609 request = Request("http://www.python.org")
1610 self.assertEqual(request.get_method(), 'GET')
1611 request = Request("http://www.python.org", {})
1612 self.assertEqual(request.get_method(), 'POST')
1613
1614 def test_with_method_arg(self):
1615 Request = urllib.request.Request
1616 request = Request("http://www.python.org", method='HEAD')
1617 self.assertEqual(request.method, 'HEAD')
1618 self.assertEqual(request.get_method(), 'HEAD')
1619 request = Request("http://www.python.org", {}, method='HEAD')
1620 self.assertEqual(request.method, 'HEAD')
1621 self.assertEqual(request.get_method(), 'HEAD')
1622 request = Request("http://www.python.org", method='GET')
1623 self.assertEqual(request.get_method(), 'GET')
1624 request.method = 'HEAD'
1625 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001626
1627
Senthil Kumaran277e9092013-04-10 20:51:19 -07001628class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001629
Senthil Kumaran277e9092013-04-10 20:51:19 -07001630 def test_converting_drive_letter(self):
1631 self.assertEqual(url2pathname("///C|"), 'C:')
1632 self.assertEqual(url2pathname("///C:"), 'C:')
1633 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001634
Senthil Kumaran277e9092013-04-10 20:51:19 -07001635 def test_converting_when_no_drive_letter(self):
1636 # cannot end a raw string in \
1637 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1638 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1639
1640 def test_simple_compare(self):
1641 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1642 r'C:\foo\bar\spam.foo')
1643
1644 def test_non_ascii_drive_letter(self):
1645 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1646
1647 def test_roundtrip_url2pathname(self):
1648 list_of_paths = ['C:',
1649 r'\\\C\test\\',
1650 r'C:\foo\bar\spam.foo'
1651 ]
1652 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001653 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001654
1655class PathName2URLTests(unittest.TestCase):
1656
1657 def test_converting_drive_letter(self):
1658 self.assertEqual(pathname2url("C:"), '///C:')
1659 self.assertEqual(pathname2url("C:\\"), '///C:')
1660
1661 def test_converting_when_no_drive_letter(self):
1662 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1663 '/////folder/test/')
1664 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1665 '////folder/test/')
1666 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1667 '/folder/test/')
1668
1669 def test_simple_compare(self):
1670 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1671 "///C:/foo/bar/spam.foo" )
1672
1673 def test_long_drive_letter(self):
1674 self.assertRaises(IOError, pathname2url, "XX:\\")
1675
1676 def test_roundtrip_pathname2url(self):
1677 list_of_paths = ['///C:',
1678 '/////folder/test/',
1679 '///C:/foo/bar/spam.foo']
1680 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001681 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001682
1683if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001684 unittest.main()