blob: 68bb49efb281070070ff80f818710a8547699da1 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Victor Stinnereb976e42019-06-12 04:07:38 +020059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Victor Stinnereb976e42019-06-12 04:07:38 +020093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Victor Stinnereb976e42019-06-12 04:07:38 +0200107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Victor Stinnereb976e42019-06-12 04:07:38 +0200110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Serhiy Storchaka700cfa82020-06-25 17:56:31 +0300154 self.quoted_pathname = urllib.parse.quote(self.pathname)
155 self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000156
Brett Cannon74bfd702003-04-25 09:39:47 +0000157 def tearDown(self):
158 """Shut down the open object"""
159 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000160 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000161
Brett Cannon74bfd702003-04-25 09:39:47 +0000162 def test_interface(self):
163 # Make sure object returned by urlopen() has the specified methods
164 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000165 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000166 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000167 "object returned by urlopen() lacks %s attribute" %
168 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000169
Brett Cannon74bfd702003-04-25 09:39:47 +0000170 def test_read(self):
171 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000172
Brett Cannon74bfd702003-04-25 09:39:47 +0000173 def test_readline(self):
174 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000175 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 "calling readline() after exhausting the file did not"
177 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000178
Brett Cannon74bfd702003-04-25 09:39:47 +0000179 def test_readlines(self):
180 lines_list = self.returned_obj.readlines()
181 self.assertEqual(len(lines_list), 1,
182 "readlines() returned the wrong number of lines")
183 self.assertEqual(lines_list[0], self.text,
184 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000185
Brett Cannon74bfd702003-04-25 09:39:47 +0000186 def test_fileno(self):
187 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000188 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000189 self.assertEqual(os.read(file_num, len(self.text)), self.text,
190 "Reading on the file descriptor returned by fileno() "
191 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000192
Brett Cannon74bfd702003-04-25 09:39:47 +0000193 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800194 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000195 # by the tearDown() method for the test
196 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000197
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700198 def test_headers(self):
199 self.assertIsInstance(self.returned_obj.headers, email.message.Message)
200
201 def test_url(self):
Serhiy Storchaka700cfa82020-06-25 17:56:31 +0300202 self.assertEqual(self.returned_obj.url, self.quoted_pathname)
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700203
204 def test_status(self):
205 self.assertIsNone(self.returned_obj.status)
206
Brett Cannon74bfd702003-04-25 09:39:47 +0000207 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000208 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000209
Brett Cannon74bfd702003-04-25 09:39:47 +0000210 def test_geturl(self):
Serhiy Storchaka700cfa82020-06-25 17:56:31 +0300211 self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000212
Christian Heimes9bd667a2008-01-20 15:14:11 +0000213 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000214 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000215
Brett Cannon74bfd702003-04-25 09:39:47 +0000216 def test_iter(self):
217 # Test iterator
218 # Don't need to count number of iterations since test would fail the
219 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200220 # comparison.
221 # Use the iterator in the usual implicit way to test for ticket #4608.
222 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000223 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000224
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800225 def test_relativelocalfile(self):
226 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
227
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700228
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000229class ProxyTests(unittest.TestCase):
230
231 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000232 # Records changes to env vars
233 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000234 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000235 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000236 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000237 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000238
239 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000240 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000241 self.env.__exit__()
242 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000243
244 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000245 self.env.set('NO_PROXY', 'localhost')
246 proxies = urllib.request.getproxies_environment()
247 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000248 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800249 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700250 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800251 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700252 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
253 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
254
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700255 def test_proxy_cgi_ignore(self):
256 try:
257 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
258 proxies = urllib.request.getproxies_environment()
259 self.assertEqual('http://somewhere:3128', proxies['http'])
260 self.env.set('REQUEST_METHOD', 'GET')
261 proxies = urllib.request.getproxies_environment()
262 self.assertNotIn('http', proxies)
263 finally:
264 self.env.unset('REQUEST_METHOD')
265 self.env.unset('HTTP_PROXY')
266
Martin Panteraa279822016-04-30 01:03:40 +0000267 def test_proxy_bypass_environment_host_match(self):
268 bypass = urllib.request.proxy_bypass_environment
269 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800270 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000271 self.assertTrue(bypass('localhost'))
272 self.assertTrue(bypass('LocalHost')) # MixedCase
273 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200274 self.assertTrue(bypass('.localhost'))
Martin Panteraa279822016-04-30 01:03:40 +0000275 self.assertTrue(bypass('newdomain.com:1234'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200276 self.assertTrue(bypass('.newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800277 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200278 self.assertTrue(bypass('d.o.t'))
Martin Panteraa279822016-04-30 01:03:40 +0000279 self.assertTrue(bypass('anotherdomain.com:8888'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200280 self.assertTrue(bypass('.anotherdomain.com:8888'))
Martin Panteraa279822016-04-30 01:03:40 +0000281 self.assertTrue(bypass('www.newdomain.com:1234'))
282 self.assertFalse(bypass('prelocalhost'))
283 self.assertFalse(bypass('newdomain.com')) # no port
284 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700285
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200286 def test_proxy_bypass_environment_always_match(self):
287 bypass = urllib.request.proxy_bypass_environment
288 self.env.set('NO_PROXY', '*')
289 self.assertTrue(bypass('newdomain.com'))
290 self.assertTrue(bypass('newdomain.com:1234'))
291 self.env.set('NO_PROXY', '*, anotherdomain.com')
292 self.assertTrue(bypass('anotherdomain.com'))
293 self.assertFalse(bypass('newdomain.com'))
294 self.assertFalse(bypass('newdomain.com:1234'))
295
296 def test_proxy_bypass_environment_newline(self):
297 bypass = urllib.request.proxy_bypass_environment
298 self.env.set('NO_PROXY',
299 'localhost, anotherdomain.com, newdomain.com:1234')
300 self.assertFalse(bypass('localhost\n'))
301 self.assertFalse(bypass('anotherdomain.com:8888\n'))
302 self.assertFalse(bypass('newdomain.com:1234\n'))
303
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700304
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700305class ProxyTests_withOrderedEnv(unittest.TestCase):
306
307 def setUp(self):
308 # We need to test conditions, where variable order _is_ significant
309 self._saved_env = os.environ
310 # Monkey patch os.environ, start with empty fake environment
311 os.environ = collections.OrderedDict()
312
313 def tearDown(self):
314 os.environ = self._saved_env
315
316 def test_getproxies_environment_prefer_lowercase(self):
317 # Test lowercase preference with removal
318 os.environ['no_proxy'] = ''
319 os.environ['No_Proxy'] = 'localhost'
320 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
321 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
322 os.environ['http_proxy'] = ''
323 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
324 proxies = urllib.request.getproxies_environment()
325 self.assertEqual({}, proxies)
326 # Test lowercase preference of proxy bypass and correct matching including ports
327 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
328 os.environ['No_Proxy'] = 'xyz.com'
329 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
330 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
331 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
332 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
333 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
334 # Test lowercase preference with replacement
335 os.environ['http_proxy'] = 'http://somewhere:3128'
336 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
337 proxies = urllib.request.getproxies_environment()
338 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000339
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700340
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700341class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000342 """Test urlopen() opening a fake http connection."""
343
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000344 def check_read(self, ver):
345 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000346 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000347 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000348 self.assertEqual(fp.readline(), b"Hello!")
349 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000350 self.assertEqual(fp.geturl(), 'http://python.org/')
351 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000352 finally:
353 self.unfakehttp()
354
Senthil Kumaran26430412011-04-13 07:01:19 +0800355 def test_url_fragment(self):
356 # Issue #11703: geturl() omits fragments in the original URL.
357 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800358 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800359 try:
360 fp = urllib.request.urlopen(url)
361 self.assertEqual(fp.geturl(), url)
362 finally:
363 self.unfakehttp()
364
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800365 def test_willclose(self):
366 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800367 try:
368 resp = urlopen("http://www.python.org")
369 self.assertTrue(resp.fp.will_close)
370 finally:
371 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800372
Xtreak2fc936e2019-05-01 17:29:49 +0530373 @unittest.skipUnless(ssl, "ssl module required")
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400374 def test_url_path_with_control_char_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700375 for char_no in list(range(0, 0x21)) + [0x7f]:
376 char = chr(char_no)
377 schemeless_url = f"//localhost:7777/test{char}/"
378 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
379 try:
380 # We explicitly test urllib.request.urlopen() instead of the top
381 # level 'def urlopen()' function defined in this... (quite ugly)
382 # test suite. They use different url opening codepaths. Plain
383 # urlopen uses FancyURLOpener which goes via a codepath that
384 # calls urllib.parse.quote() on the URL which makes all of the
385 # above attempts at injection within the url _path_ safe.
386 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400387 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700388 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400389 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700390 urllib.request.urlopen(f"http:{schemeless_url}")
391 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400392 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700393 urllib.request.urlopen(f"https:{schemeless_url}")
394 # This code path quotes the URL so there is no injection.
395 resp = urlopen(f"http:{schemeless_url}")
396 self.assertNotIn(char, resp.geturl())
397 finally:
398 self.unfakehttp()
399
Xtreak2fc936e2019-05-01 17:29:49 +0530400 @unittest.skipUnless(ssl, "ssl module required")
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400401 def test_url_path_with_newline_header_injection_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700402 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
403 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
404 schemeless_url = "//" + host + ":8080/test/?test=a"
405 try:
406 # We explicitly test urllib.request.urlopen() instead of the top
407 # level 'def urlopen()' function defined in this... (quite ugly)
408 # test suite. They use different url opening codepaths. Plain
409 # urlopen uses FancyURLOpener which goes via a codepath that
410 # calls urllib.parse.quote() on the URL which makes all of the
411 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400412 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700413 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400414 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700415 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400416 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700417 urllib.request.urlopen(f"https:{schemeless_url}")
418 # This code path quotes the URL so there is no injection.
419 resp = urlopen(f"http:{schemeless_url}")
420 self.assertNotIn(' ', resp.geturl())
421 self.assertNotIn('\r', resp.geturl())
422 self.assertNotIn('\n', resp.geturl())
423 finally:
424 self.unfakehttp()
425
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400426 @unittest.skipUnless(ssl, "ssl module required")
427 def test_url_host_with_control_char_rejected(self):
428 for char_no in list(range(0, 0x21)) + [0x7f]:
429 char = chr(char_no)
430 schemeless_url = f"//localhost{char}/test/"
431 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
432 try:
433 escaped_char_repr = repr(char).replace('\\', r'\\')
434 InvalidURL = http.client.InvalidURL
435 with self.assertRaisesRegex(
436 InvalidURL, f"contain control.*{escaped_char_repr}"):
437 urlopen(f"http:{schemeless_url}")
438 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
439 urlopen(f"https:{schemeless_url}")
440 finally:
441 self.unfakehttp()
442
443 @unittest.skipUnless(ssl, "ssl module required")
444 def test_url_host_with_newline_header_injection_rejected(self):
445 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
446 host = "localhost\r\nX-injected: header\r\n"
447 schemeless_url = "//" + host + ":8080/test/?test=a"
448 try:
449 InvalidURL = http.client.InvalidURL
450 with self.assertRaisesRegex(
451 InvalidURL, r"contain control.*\\r"):
452 urlopen(f"http:{schemeless_url}")
453 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
454 urlopen(f"https:{schemeless_url}")
455 finally:
456 self.unfakehttp()
457
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000458 def test_read_0_9(self):
459 # "0.9" response accepted (but not "simple responses" without
460 # a status line)
461 self.check_read(b"0.9")
462
463 def test_read_1_0(self):
464 self.check_read(b"1.0")
465
466 def test_read_1_1(self):
467 self.check_read(b"1.1")
468
Christian Heimes57dddfb2008-01-02 18:30:52 +0000469 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200470 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000471 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
472Date: Wed, 02 Jan 2008 03:03:54 GMT
473Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
474Connection: close
475Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200476''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000477 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200478 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000479 finally:
480 self.unfakehttp()
481
guido@google.coma119df92011-03-29 11:41:02 -0700482 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200483 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700484 self.fakehttp(b'''HTTP/1.1 302 Found
485Date: Wed, 02 Jan 2008 03:03:54 GMT
486Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
487Location: file://guidocomputer.athome.com:/python/license
488Connection: close
489Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200490''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700491 try:
Martin Pantera0370222016-02-04 06:01:35 +0000492 msg = "Redirection to url 'file:"
493 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
494 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700495 finally:
496 self.unfakehttp()
497
Martin Pantera0370222016-02-04 06:01:35 +0000498 def test_redirect_limit_independent(self):
499 # Ticket #12923: make sure independent requests each use their
500 # own retry limit.
501 for i in range(FancyURLopener().maxtries):
502 self.fakehttp(b'''HTTP/1.1 302 Found
503Location: file://guidocomputer.athome.com:/python/license
504Connection: close
Victor Stinnereb976e42019-06-12 04:07:38 +0200505''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000506 try:
507 self.assertRaises(urllib.error.HTTPError, urlopen,
508 "http://something")
509 finally:
510 self.unfakehttp()
511
Guido van Rossumd8faa362007-04-27 19:54:29 +0000512 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200513 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000514 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000515 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000516 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200517 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000518 finally:
519 self.unfakehttp()
520
Senthil Kumaranf5776862012-10-21 13:30:02 -0700521 def test_missing_localfile(self):
522 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700523 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700524 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700525 self.assertTrue(e.exception.filename)
526 self.assertTrue(e.exception.reason)
527
528 def test_file_notexists(self):
529 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700530 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700531 try:
532 self.assertTrue(os.path.exists(tmp_file))
533 with urlopen(tmp_fileurl) as fobj:
534 self.assertTrue(fobj)
535 finally:
536 os.close(fd)
537 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700538 self.assertFalse(os.path.exists(tmp_file))
539 with self.assertRaises(urllib.error.URLError):
540 urlopen(tmp_fileurl)
541
542 def test_ftp_nohost(self):
543 test_ftp_url = 'ftp:///path'
544 with self.assertRaises(urllib.error.URLError) as e:
545 urlopen(test_ftp_url)
546 self.assertFalse(e.exception.filename)
547 self.assertTrue(e.exception.reason)
548
549 def test_ftp_nonexisting(self):
550 with self.assertRaises(urllib.error.URLError) as e:
551 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
552 self.assertFalse(e.exception.filename)
553 self.assertTrue(e.exception.reason)
554
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700555 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
556 def test_ftp_cache_pruning(self):
557 self.fakeftp()
558 try:
559 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
560 urlopen('ftp://localhost')
561 finally:
562 self.unfakeftp()
563
Senthil Kumarande0eb242010-08-01 17:53:37 +0000564 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000565 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000566 try:
567 fp = urlopen("http://user:pass@python.org/")
568 self.assertEqual(fp.readline(), b"Hello!")
569 self.assertEqual(fp.readline(), b"")
570 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
571 self.assertEqual(fp.getcode(), 200)
572 finally:
573 self.unfakehttp()
574
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800575 def test_userpass_inurl_w_spaces(self):
576 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
577 try:
578 userpass = "a b:c d"
579 url = "http://{}@python.org/".format(userpass)
580 fakehttp_wrapper = http.client.HTTPConnection
581 authorization = ("Authorization: Basic %s\r\n" %
582 b64encode(userpass.encode("ASCII")).decode("ASCII"))
583 fp = urlopen(url)
584 # The authorization header must be in place
585 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
586 self.assertEqual(fp.readline(), b"Hello!")
587 self.assertEqual(fp.readline(), b"")
588 # the spaces are quoted in URL so no match
589 self.assertNotEqual(fp.geturl(), url)
590 self.assertEqual(fp.getcode(), 200)
591 finally:
592 self.unfakehttp()
593
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700594 def test_URLopener_deprecation(self):
595 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700596 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700597
Antoine Pitrou07df6552014-11-02 17:23:14 +0100598 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800599 def test_cafile_and_context(self):
600 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200601 with support.check_warnings(('', DeprecationWarning)):
602 with self.assertRaises(ValueError):
603 urllib.request.urlopen(
604 "https://localhost", cafile="/nonexistent/path", context=context
605 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800606
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700607
Antoine Pitroudf204be2012-11-24 17:59:08 +0100608class urlopen_DataTests(unittest.TestCase):
609 """Test urlopen() opening a data URL."""
610
611 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200612 # clear _opener global variable
613 self.addCleanup(urllib.request.urlcleanup)
614
Antoine Pitroudf204be2012-11-24 17:59:08 +0100615 # text containing URL special- and unicode-characters
616 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
617 # 2x1 pixel RGB PNG image with one black and one white pixel
618 self.image = (
619 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
620 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
621 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
622 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
623
624 self.text_url = (
625 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
626 "D%26%20%C3%B6%20%C3%84%20")
627 self.text_url_base64 = (
628 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
629 "sJT0mIPYgxCA%3D")
630 # base64 encoded data URL that contains ignorable spaces,
631 # such as "\n", " ", "%0A", and "%20".
632 self.image_url = (
633 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
634 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
635 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
636
637 self.text_url_resp = urllib.request.urlopen(self.text_url)
638 self.text_url_base64_resp = urllib.request.urlopen(
639 self.text_url_base64)
640 self.image_url_resp = urllib.request.urlopen(self.image_url)
641
642 def test_interface(self):
643 # Make sure object returned by urlopen() has the specified methods
644 for attr in ("read", "readline", "readlines",
645 "close", "info", "geturl", "getcode", "__iter__"):
646 self.assertTrue(hasattr(self.text_url_resp, attr),
647 "object returned by urlopen() lacks %s attribute" %
648 attr)
649
650 def test_info(self):
651 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
652 self.assertEqual(self.text_url_base64_resp.info().get_params(),
653 [('text/plain', ''), ('charset', 'ISO-8859-1')])
654 self.assertEqual(self.image_url_resp.info()['content-length'],
655 str(len(self.image)))
656 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
657 [('text/plain', ''), ('charset', 'US-ASCII')])
658
659 def test_geturl(self):
660 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
661 self.assertEqual(self.text_url_base64_resp.geturl(),
662 self.text_url_base64)
663 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
664
665 def test_read_text(self):
666 self.assertEqual(self.text_url_resp.read().decode(
667 dict(self.text_url_resp.info().get_params())['charset']), self.text)
668
669 def test_read_text_base64(self):
670 self.assertEqual(self.text_url_base64_resp.read().decode(
671 dict(self.text_url_base64_resp.info().get_params())['charset']),
672 self.text)
673
674 def test_read_image(self):
675 self.assertEqual(self.image_url_resp.read(), self.image)
676
677 def test_missing_comma(self):
678 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
679
680 def test_invalid_base64_data(self):
681 # missing padding character
682 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
683
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700684
Brett Cannon19691362003-04-29 05:08:06 +0000685class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000686 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000687
Brett Cannon19691362003-04-29 05:08:06 +0000688 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200689 # clear _opener global variable
690 self.addCleanup(urllib.request.urlcleanup)
691
Georg Brandl5a650a22005-08-26 08:51:34 +0000692 # Create a list of temporary files. Each item in the list is a file
693 # name (absolute path or relative to the current working directory).
694 # All files in this list will be deleted in the tearDown method. Note,
695 # this only helps to makes sure temporary files get deleted, but it
696 # does nothing about trying to close files that may still be open. It
697 # is the responsibility of the developer to properly close files even
698 # when exceptional conditions occur.
699 self.tempFiles = []
700
Brett Cannon19691362003-04-29 05:08:06 +0000701 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000702 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000703 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000704 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000705 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000706 FILE.write(self.text)
707 FILE.close()
708 finally:
709 try: FILE.close()
710 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000711
712 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000713 # Delete the temporary files.
714 for each in self.tempFiles:
715 try: os.remove(each)
716 except: pass
717
718 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000719 filePath = os.path.abspath(filePath)
720 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000721 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000722 except UnicodeEncodeError:
723 raise unittest.SkipTest("filePath is not encodable to utf8")
724 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000725
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000726 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000727 """Creates a new temporary file containing the specified data,
728 registers the file for deletion during the test fixture tear down, and
729 returns the absolute path of the file."""
730
731 newFd, newFilePath = tempfile.mkstemp()
732 try:
733 self.registerFileForCleanUp(newFilePath)
734 newFile = os.fdopen(newFd, "wb")
735 newFile.write(data)
736 newFile.close()
737 finally:
738 try: newFile.close()
739 except: pass
740 return newFilePath
741
742 def registerFileForCleanUp(self, fileName):
743 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000744
745 def test_basic(self):
746 # Make sure that a local file just gets its own location returned and
747 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000748 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000749 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000750 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000751 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000752 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000753
754 def test_copy(self):
755 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000756 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000757 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000758 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000759 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000760 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000761 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000762 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000763 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000764 try:
765 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000766 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000767 finally:
768 try: FILE.close()
769 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000770 self.assertEqual(self.text, text)
771
772 def test_reporthook(self):
773 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700774 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
775 self.assertIsInstance(block_count, int)
776 self.assertIsInstance(block_read_size, int)
777 self.assertIsInstance(file_size, int)
778 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000779 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000780 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000781 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000782 urllib.request.urlretrieve(
783 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000784 second_temp, hooktester)
785
786 def test_reporthook_0_bytes(self):
787 # Test on zero length file. Should call reporthook only 1 time.
788 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700789 def hooktester(block_count, block_read_size, file_size, _report=report):
790 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000791 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000792 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000793 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000794 self.assertEqual(len(report), 1)
795 self.assertEqual(report[0][2], 0)
796
797 def test_reporthook_5_bytes(self):
798 # Test on 5 byte file. Should call reporthook only 2 times (once when
799 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700800 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000801 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700802 def hooktester(block_count, block_read_size, file_size, _report=report):
803 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000804 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000805 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000806 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000807 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800808 self.assertEqual(report[0][2], 5)
809 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000810
811 def test_reporthook_8193_bytes(self):
812 # Test on 8193 byte file. Should call reporthook only 3 times (once
813 # when the "network connection" is established, once for the next 8192
814 # bytes, and once for the last byte).
815 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700816 def hooktester(block_count, block_read_size, file_size, _report=report):
817 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000818 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000819 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000820 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000821 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800822 self.assertEqual(report[0][2], 8193)
823 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700824 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800825 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000826
Senthil Kumarance260142011-11-01 01:35:17 +0800827
828class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
829 """Test urllib.urlretrieve() using fake http connections"""
830
831 def test_short_content_raises_ContentTooShortError(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200832 self.addCleanup(urllib.request.urlcleanup)
833
Senthil Kumarance260142011-11-01 01:35:17 +0800834 self.fakehttp(b'''HTTP/1.1 200 OK
835Date: Wed, 02 Jan 2008 03:03:54 GMT
836Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
837Connection: close
838Content-Length: 100
839Content-Type: text/html; charset=iso-8859-1
840
841FF
842''')
843
844 def _reporthook(par1, par2, par3):
845 pass
846
847 with self.assertRaises(urllib.error.ContentTooShortError):
848 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100849 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800850 reporthook=_reporthook)
851 finally:
852 self.unfakehttp()
853
854 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200855 self.addCleanup(urllib.request.urlcleanup)
856
Senthil Kumarance260142011-11-01 01:35:17 +0800857 self.fakehttp(b'''HTTP/1.1 200 OK
858Date: Wed, 02 Jan 2008 03:03:54 GMT
859Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
860Connection: close
861Content-Length: 100
862Content-Type: text/html; charset=iso-8859-1
863
864FF
865''')
866 with self.assertRaises(urllib.error.ContentTooShortError):
867 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100868 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800869 finally:
870 self.unfakehttp()
871
872
Brett Cannon74bfd702003-04-25 09:39:47 +0000873class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400874 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000875
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530876 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000877 character you write it as '%' + <2 character US-ASCII hex value>.
878 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
879 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000880
881 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000882
Brett Cannon74bfd702003-04-25 09:39:47 +0000883 Reserved characters : ";/?:@&=+$,"
884 Have special meaning in URIs and must be escaped if not being used for
885 their special meaning
886 Data characters : letters, digits, and "-_.!~*'()"
887 Unreserved and do not need to be escaped; can be, though, if desired
888 Control characters : 0x00 - 0x1F, 0x7F
889 Have no use in URIs so must be escaped
890 space : 0x20
891 Must be escaped
892 Delimiters : '<>#%"'
893 Must be escaped
894 Unwise : "{}|\^[]`"
895 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000896
Brett Cannon74bfd702003-04-25 09:39:47 +0000897 """
898
899 def test_never_quote(self):
900 # Make sure quote() does not quote letters, digits, and "_,.-"
901 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
902 "abcdefghijklmnopqrstuvwxyz",
903 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530904 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000905 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000906 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000907 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000908 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000909 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000910 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000911
912 def test_default_safe(self):
913 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000914 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000915
916 def test_safe(self):
917 # Test setting 'safe' parameter does what it should do
918 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000919 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000920 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000921 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000922 result = urllib.parse.quote_plus(quote_by_default,
923 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000924 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000925 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000926 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000927 # Safe expressed as bytes rather than str
928 result = urllib.parse.quote(quote_by_default, safe=b"<>")
929 self.assertEqual(quote_by_default, result,
930 "using quote(): %r != %r" % (quote_by_default, result))
931 # "Safe" non-ASCII characters should have no effect
932 # (Since URIs are not allowed to have non-ASCII characters)
933 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
934 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
935 self.assertEqual(expect, result,
936 "using quote(): %r != %r" %
937 (expect, result))
938 # Same as above, but using a bytes rather than str
939 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
940 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
941 self.assertEqual(expect, result,
942 "using quote(): %r != %r" %
943 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000944
945 def test_default_quoting(self):
946 # Make sure all characters that should be quoted are by default sans
947 # space (separate test for that).
948 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400949 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000950 should_quote.append(chr(127)) # For 0x7F
951 should_quote = ''.join(should_quote)
952 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000953 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000954 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000955 "using quote(): "
956 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000957 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000958 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000959 self.assertEqual(hexescape(char), result,
960 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000961 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000962 (char, hexescape(char), result))
963 del should_quote
964 partial_quote = "ab[]cd"
965 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000966 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000967 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000968 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800969 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000970 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000971 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000972
973 def test_quoting_space(self):
974 # Make sure quote() and quote_plus() handle spaces as specified in
975 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000976 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000977 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000978 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000979 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000980 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000981 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000982 given = "a b cd e f"
983 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000984 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000985 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000986 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000987 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000988 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000989 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000990 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000991
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000992 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000993 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000994 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000995 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000996 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000997 # Test with bytes
998 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
999 'alpha%2Bbeta+gamma')
1000 # Test with safe bytes
1001 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
1002 'alpha+beta+gamma')
1003
1004 def test_quote_bytes(self):
1005 # Bytes should quote directly to percent-encoded values
1006 given = b"\xa2\xd8ab\xff"
1007 expect = "%A2%D8ab%FF"
1008 result = urllib.parse.quote(given)
1009 self.assertEqual(expect, result,
1010 "using quote(): %r != %r" % (expect, result))
1011 # Encoding argument should raise type error on bytes input
1012 self.assertRaises(TypeError, urllib.parse.quote, given,
1013 encoding="latin-1")
1014 # quote_from_bytes should work the same
1015 result = urllib.parse.quote_from_bytes(given)
1016 self.assertEqual(expect, result,
1017 "using quote_from_bytes(): %r != %r"
1018 % (expect, result))
1019
1020 def test_quote_with_unicode(self):
1021 # Characters in Latin-1 range, encoded by default in UTF-8
1022 given = "\xa2\xd8ab\xff"
1023 expect = "%C2%A2%C3%98ab%C3%BF"
1024 result = urllib.parse.quote(given)
1025 self.assertEqual(expect, result,
1026 "using quote(): %r != %r" % (expect, result))
1027 # Characters in Latin-1 range, encoded by with None (default)
1028 result = urllib.parse.quote(given, encoding=None, errors=None)
1029 self.assertEqual(expect, result,
1030 "using quote(): %r != %r" % (expect, result))
1031 # Characters in Latin-1 range, encoded with Latin-1
1032 given = "\xa2\xd8ab\xff"
1033 expect = "%A2%D8ab%FF"
1034 result = urllib.parse.quote(given, encoding="latin-1")
1035 self.assertEqual(expect, result,
1036 "using quote(): %r != %r" % (expect, result))
1037 # Characters in BMP, encoded by default in UTF-8
1038 given = "\u6f22\u5b57" # "Kanji"
1039 expect = "%E6%BC%A2%E5%AD%97"
1040 result = urllib.parse.quote(given)
1041 self.assertEqual(expect, result,
1042 "using quote(): %r != %r" % (expect, result))
1043 # Characters in BMP, encoded with Latin-1
1044 given = "\u6f22\u5b57"
1045 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1046 encoding="latin-1")
1047 # Characters in BMP, encoded with Latin-1, with replace error handling
1048 given = "\u6f22\u5b57"
1049 expect = "%3F%3F" # "??"
1050 result = urllib.parse.quote(given, encoding="latin-1",
1051 errors="replace")
1052 self.assertEqual(expect, result,
1053 "using quote(): %r != %r" % (expect, result))
1054 # Characters in BMP, Latin-1, with xmlcharref error handling
1055 given = "\u6f22\u5b57"
1056 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1057 result = urllib.parse.quote(given, encoding="latin-1",
1058 errors="xmlcharrefreplace")
1059 self.assertEqual(expect, result,
1060 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001061
Georg Brandlfaf41492009-05-26 18:31:11 +00001062 def test_quote_plus_with_unicode(self):
1063 # Encoding (latin-1) test for quote_plus
1064 given = "\xa2\xd8 \xff"
1065 expect = "%A2%D8+%FF"
1066 result = urllib.parse.quote_plus(given, encoding="latin-1")
1067 self.assertEqual(expect, result,
1068 "using quote_plus(): %r != %r" % (expect, result))
1069 # Errors test for quote_plus
1070 given = "ab\u6f22\u5b57 cd"
1071 expect = "ab%3F%3F+cd"
1072 result = urllib.parse.quote_plus(given, encoding="latin-1",
1073 errors="replace")
1074 self.assertEqual(expect, result,
1075 "using quote_plus(): %r != %r" % (expect, result))
1076
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001077
Brett Cannon74bfd702003-04-25 09:39:47 +00001078class UnquotingTests(unittest.TestCase):
1079 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001080
Brett Cannon74bfd702003-04-25 09:39:47 +00001081 See the doc string for quoting_Tests for details on quoting and such.
1082
1083 """
1084
1085 def test_unquoting(self):
1086 # Make sure unquoting of all ASCII values works
1087 escape_list = []
1088 for num in range(128):
1089 given = hexescape(chr(num))
1090 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001091 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001092 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001093 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001094 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001095 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001096 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001097 (expect, result))
1098 escape_list.append(given)
1099 escape_string = ''.join(escape_list)
1100 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001101 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001102 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001103 "using unquote(): not all characters escaped: "
1104 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001105 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1106 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Brett Cannon74bfd702003-04-25 09:39:47 +00001107
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001108 def test_unquoting_badpercent(self):
1109 # Test unquoting on bad percent-escapes
1110 given = '%xab'
1111 expect = given
1112 result = urllib.parse.unquote(given)
1113 self.assertEqual(expect, result, "using unquote(): %r != %r"
1114 % (expect, result))
1115 given = '%x'
1116 expect = given
1117 result = urllib.parse.unquote(given)
1118 self.assertEqual(expect, result, "using unquote(): %r != %r"
1119 % (expect, result))
1120 given = '%'
1121 expect = given
1122 result = urllib.parse.unquote(given)
1123 self.assertEqual(expect, result, "using unquote(): %r != %r"
1124 % (expect, result))
1125 # unquote_to_bytes
1126 given = '%xab'
1127 expect = bytes(given, 'ascii')
1128 result = urllib.parse.unquote_to_bytes(given)
1129 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1130 % (expect, result))
1131 given = '%x'
1132 expect = bytes(given, 'ascii')
1133 result = urllib.parse.unquote_to_bytes(given)
1134 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1135 % (expect, result))
1136 given = '%'
1137 expect = bytes(given, 'ascii')
1138 result = urllib.parse.unquote_to_bytes(given)
1139 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1140 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001141 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1142 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001143
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001144 def test_unquoting_mixed_case(self):
1145 # Test unquoting on mixed-case hex digits in the percent-escapes
1146 given = '%Ab%eA'
1147 expect = b'\xab\xea'
1148 result = urllib.parse.unquote_to_bytes(given)
1149 self.assertEqual(expect, result,
1150 "using unquote_to_bytes(): %r != %r"
1151 % (expect, result))
1152
Brett Cannon74bfd702003-04-25 09:39:47 +00001153 def test_unquoting_parts(self):
1154 # Make sure unquoting works when have non-quoted characters
1155 # interspersed
1156 given = 'ab%sd' % hexescape('c')
1157 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001158 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001159 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001160 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001161 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001162 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001163 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001164
Brett Cannon74bfd702003-04-25 09:39:47 +00001165 def test_unquoting_plus(self):
1166 # Test difference between unquote() and unquote_plus()
1167 given = "are+there+spaces..."
1168 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001169 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001170 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001171 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001172 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001173 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001174 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001175 "using unquote_plus(): %r != %r" % (expect, result))
1176
1177 def test_unquote_to_bytes(self):
1178 given = 'br%C3%BCckner_sapporo_20050930.doc'
1179 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1180 result = urllib.parse.unquote_to_bytes(given)
1181 self.assertEqual(expect, result,
1182 "using unquote_to_bytes(): %r != %r"
1183 % (expect, result))
1184 # Test on a string with unescaped non-ASCII characters
1185 # (Technically an invalid URI; expect those characters to be UTF-8
1186 # encoded).
1187 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1188 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1189 self.assertEqual(expect, result,
1190 "using unquote_to_bytes(): %r != %r"
1191 % (expect, result))
1192 # Test with a bytes as input
1193 given = b'%A2%D8ab%FF'
1194 expect = b'\xa2\xd8ab\xff'
1195 result = urllib.parse.unquote_to_bytes(given)
1196 self.assertEqual(expect, result,
1197 "using unquote_to_bytes(): %r != %r"
1198 % (expect, result))
1199 # Test with a bytes as input, with unescaped non-ASCII bytes
1200 # (Technically an invalid URI; expect those bytes to be preserved)
1201 given = b'%A2\xd8ab%FF'
1202 expect = b'\xa2\xd8ab\xff'
1203 result = urllib.parse.unquote_to_bytes(given)
1204 self.assertEqual(expect, result,
1205 "using unquote_to_bytes(): %r != %r"
1206 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001207
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001208 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001209 # Characters in the Latin-1 range, encoded with UTF-8
1210 given = 'br%C3%BCckner_sapporo_20050930.doc'
1211 expect = 'br\u00fcckner_sapporo_20050930.doc'
1212 result = urllib.parse.unquote(given)
1213 self.assertEqual(expect, result,
1214 "using unquote(): %r != %r" % (expect, result))
1215 # Characters in the Latin-1 range, encoded with None (default)
1216 result = urllib.parse.unquote(given, encoding=None, errors=None)
1217 self.assertEqual(expect, result,
1218 "using unquote(): %r != %r" % (expect, result))
1219
1220 # Characters in the Latin-1 range, encoded with Latin-1
1221 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1222 encoding="latin-1")
1223 expect = 'br\u00fcckner_sapporo_20050930.doc'
1224 self.assertEqual(expect, result,
1225 "using unquote(): %r != %r" % (expect, result))
1226
1227 # Characters in BMP, encoded with UTF-8
1228 given = "%E6%BC%A2%E5%AD%97"
1229 expect = "\u6f22\u5b57" # "Kanji"
1230 result = urllib.parse.unquote(given)
1231 self.assertEqual(expect, result,
1232 "using unquote(): %r != %r" % (expect, result))
1233
1234 # Decode with UTF-8, invalid sequence
1235 given = "%F3%B1"
1236 expect = "\ufffd" # Replacement character
1237 result = urllib.parse.unquote(given)
1238 self.assertEqual(expect, result,
1239 "using unquote(): %r != %r" % (expect, result))
1240
1241 # Decode with UTF-8, invalid sequence, replace errors
1242 result = urllib.parse.unquote(given, errors="replace")
1243 self.assertEqual(expect, result,
1244 "using unquote(): %r != %r" % (expect, result))
1245
1246 # Decode with UTF-8, invalid sequence, ignoring errors
1247 given = "%F3%B1"
1248 expect = ""
1249 result = urllib.parse.unquote(given, errors="ignore")
1250 self.assertEqual(expect, result,
1251 "using unquote(): %r != %r" % (expect, result))
1252
1253 # A mix of non-ASCII and percent-encoded characters, UTF-8
1254 result = urllib.parse.unquote("\u6f22%C3%BC")
1255 expect = '\u6f22\u00fc'
1256 self.assertEqual(expect, result,
1257 "using unquote(): %r != %r" % (expect, result))
1258
1259 # A mix of non-ASCII and percent-encoded characters, Latin-1
1260 # (Note, the string contains non-Latin-1-representable characters)
1261 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1262 expect = '\u6f22\u00fc'
1263 self.assertEqual(expect, result,
1264 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001265
Stein Karlsenaad2ee02019-10-14 12:36:29 +02001266 def test_unquoting_with_bytes_input(self):
1267 # ASCII characters decoded to a string
1268 given = b'blueberryjam'
1269 expect = 'blueberryjam'
1270 result = urllib.parse.unquote(given)
1271 self.assertEqual(expect, result,
1272 "using unquote(): %r != %r" % (expect, result))
1273
1274 # A mix of non-ASCII hex-encoded characters and ASCII characters
1275 given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
1276 expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
1277 result = urllib.parse.unquote(given)
1278 self.assertEqual(expect, result,
1279 "using unquote(): %r != %r" % (expect, result))
1280
1281 # A mix of non-ASCII percent-encoded characters and ASCII characters
1282 given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
1283 expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
1284 result = urllib.parse.unquote(given)
1285 self.assertEqual(expect, result,
1286 "using unquote(): %r != %r" % (expect, result))
1287
1288
Brett Cannon74bfd702003-04-25 09:39:47 +00001289class urlencode_Tests(unittest.TestCase):
1290 """Tests for urlencode()"""
1291
1292 def help_inputtype(self, given, test_type):
1293 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001294
Brett Cannon74bfd702003-04-25 09:39:47 +00001295 'given' must lead to only the pairs:
1296 * 1st, 1
1297 * 2nd, 2
1298 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001299
Brett Cannon74bfd702003-04-25 09:39:47 +00001300 Test cannot assume anything about order. Docs make no guarantee and
1301 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001302
Brett Cannon74bfd702003-04-25 09:39:47 +00001303 """
1304 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001305 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001306 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001307 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001308 "testing %s: %s not found in %s" %
1309 (test_type, expected, result))
1310 self.assertEqual(result.count('&'), 2,
1311 "testing %s: expected 2 '&'s; got %s" %
1312 (test_type, result.count('&')))
1313 amp_location = result.index('&')
1314 on_amp_left = result[amp_location - 1]
1315 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001316 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001317 "testing %s: '&' not located in proper place in %s" %
1318 (test_type, result))
1319 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1320 "testing %s: "
1321 "unexpected number of characters: %s != %s" %
1322 (test_type, len(result), (5 * 3) + 2))
1323
1324 def test_using_mapping(self):
1325 # Test passing in a mapping object as an argument.
1326 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1327 "using dict as input type")
1328
1329 def test_using_sequence(self):
1330 # Test passing in a sequence of two-item sequences as an argument.
1331 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1332 "using sequence of two-item tuples as input")
1333
1334 def test_quoting(self):
1335 # Make sure keys and values are quoted using quote_plus()
1336 given = {"&":"="}
1337 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001338 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001339 self.assertEqual(expect, result)
1340 given = {"key name":"A bunch of pluses"}
1341 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001342 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001343 self.assertEqual(expect, result)
1344
1345 def test_doseq(self):
1346 # Test that passing True for 'doseq' parameter works correctly
1347 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001348 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1349 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001350 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001351 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001352 for value in given["sequence"]:
1353 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001354 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001355 self.assertEqual(result.count('&'), 2,
1356 "Expected 2 '&'s, got %s" % result.count('&'))
1357
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001358 def test_empty_sequence(self):
1359 self.assertEqual("", urllib.parse.urlencode({}))
1360 self.assertEqual("", urllib.parse.urlencode([]))
1361
1362 def test_nonstring_values(self):
1363 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1364 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1365
1366 def test_nonstring_seq_values(self):
1367 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1368 self.assertEqual("a=None&a=a",
1369 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001370 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001371 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001372 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001373
Senthil Kumarandf022da2010-07-03 17:48:22 +00001374 def test_urlencode_encoding(self):
1375 # ASCII encoding. Expect %3F with errors="replace'
1376 given = (('\u00a0', '\u00c1'),)
1377 expect = '%3F=%3F'
1378 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1379 self.assertEqual(expect, result)
1380
1381 # Default is UTF-8 encoding.
1382 given = (('\u00a0', '\u00c1'),)
1383 expect = '%C2%A0=%C3%81'
1384 result = urllib.parse.urlencode(given)
1385 self.assertEqual(expect, result)
1386
1387 # Latin-1 encoding.
1388 given = (('\u00a0', '\u00c1'),)
1389 expect = '%A0=%C1'
1390 result = urllib.parse.urlencode(given, encoding="latin-1")
1391 self.assertEqual(expect, result)
1392
1393 def test_urlencode_encoding_doseq(self):
1394 # ASCII Encoding. Expect %3F with errors="replace'
1395 given = (('\u00a0', '\u00c1'),)
1396 expect = '%3F=%3F'
1397 result = urllib.parse.urlencode(given, doseq=True,
1398 encoding="ASCII", errors="replace")
1399 self.assertEqual(expect, result)
1400
1401 # ASCII Encoding. On a sequence of values.
1402 given = (("\u00a0", (1, "\u00c1")),)
1403 expect = '%3F=1&%3F=%3F'
1404 result = urllib.parse.urlencode(given, True,
1405 encoding="ASCII", errors="replace")
1406 self.assertEqual(expect, result)
1407
1408 # Utf-8
1409 given = (("\u00a0", "\u00c1"),)
1410 expect = '%C2%A0=%C3%81'
1411 result = urllib.parse.urlencode(given, True)
1412 self.assertEqual(expect, result)
1413
1414 given = (("\u00a0", (42, "\u00c1")),)
1415 expect = '%C2%A0=42&%C2%A0=%C3%81'
1416 result = urllib.parse.urlencode(given, True)
1417 self.assertEqual(expect, result)
1418
1419 # latin-1
1420 given = (("\u00a0", "\u00c1"),)
1421 expect = '%A0=%C1'
1422 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1423 self.assertEqual(expect, result)
1424
1425 given = (("\u00a0", (42, "\u00c1")),)
1426 expect = '%A0=42&%A0=%C1'
1427 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1428 self.assertEqual(expect, result)
1429
1430 def test_urlencode_bytes(self):
1431 given = ((b'\xa0\x24', b'\xc1\x24'),)
1432 expect = '%A0%24=%C1%24'
1433 result = urllib.parse.urlencode(given)
1434 self.assertEqual(expect, result)
1435 result = urllib.parse.urlencode(given, True)
1436 self.assertEqual(expect, result)
1437
1438 # Sequence of values
1439 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1440 expect = '%A0%24=42&%A0%24=%C1%24'
1441 result = urllib.parse.urlencode(given, True)
1442 self.assertEqual(expect, result)
1443
1444 def test_urlencode_encoding_safe_parameter(self):
1445
1446 # Send '$' (\x24) as safe character
1447 # Default utf-8 encoding
1448
1449 given = ((b'\xa0\x24', b'\xc1\x24'),)
1450 result = urllib.parse.urlencode(given, safe=":$")
1451 expect = '%A0$=%C1$'
1452 self.assertEqual(expect, result)
1453
1454 given = ((b'\xa0\x24', b'\xc1\x24'),)
1455 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1456 expect = '%A0$=%C1$'
1457 self.assertEqual(expect, result)
1458
1459 # Safe parameter in sequence
1460 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1461 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1462 result = urllib.parse.urlencode(given, True, safe=":$")
1463 self.assertEqual(expect, result)
1464
1465 # Test all above in latin-1 encoding
1466
1467 given = ((b'\xa0\x24', b'\xc1\x24'),)
1468 result = urllib.parse.urlencode(given, safe=":$",
1469 encoding="latin-1")
1470 expect = '%A0$=%C1$'
1471 self.assertEqual(expect, result)
1472
1473 given = ((b'\xa0\x24', b'\xc1\x24'),)
1474 expect = '%A0$=%C1$'
1475 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1476 encoding="latin-1")
1477
1478 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1479 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1480 result = urllib.parse.urlencode(given, True, safe=":$",
1481 encoding="latin-1")
1482 self.assertEqual(expect, result)
1483
Brett Cannon74bfd702003-04-25 09:39:47 +00001484class Pathname_Tests(unittest.TestCase):
1485 """Test pathname2url() and url2pathname()"""
1486
1487 def test_basic(self):
1488 # Make sure simple tests pass
1489 expected_path = os.path.join("parts", "of", "a", "path")
1490 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001491 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001492 self.assertEqual(expected_url, result,
1493 "pathname2url() failed; %s != %s" %
1494 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001495 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001496 self.assertEqual(expected_path, result,
1497 "url2pathame() failed; %s != %s" %
1498 (result, expected_path))
1499
1500 def test_quoting(self):
1501 # Test automatic quoting and unquoting works for pathnam2url() and
1502 # url2pathname() respectively
1503 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001504 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1505 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001506 self.assertEqual(expect, result,
1507 "pathname2url() failed; %s != %s" %
1508 (expect, result))
1509 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001510 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001511 self.assertEqual(expect, result,
1512 "url2pathname() failed; %s != %s" %
1513 (expect, result))
1514 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001515 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1516 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001517 self.assertEqual(expect, result,
1518 "pathname2url() failed; %s != %s" %
1519 (expect, result))
1520 given = "make+sure/using_unquote"
1521 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001522 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001523 self.assertEqual(expect, result,
1524 "url2pathname() failed; %s != %s" %
1525 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001526
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001527 @unittest.skipUnless(sys.platform == 'win32',
1528 'test specific to the urllib.url2path function.')
1529 def test_ntpath(self):
1530 given = ('/C:/', '///C:/', '/C|//')
1531 expect = 'C:\\'
1532 for url in given:
1533 result = urllib.request.url2pathname(url)
1534 self.assertEqual(expect, result,
1535 'urllib.request..url2pathname() failed; %s != %s' %
1536 (expect, result))
1537 given = '///C|/path'
1538 expect = 'C:\\path'
1539 result = urllib.request.url2pathname(given)
1540 self.assertEqual(expect, result,
1541 'urllib.request.url2pathname() failed; %s != %s' %
1542 (expect, result))
1543
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001544class Utility_Tests(unittest.TestCase):
1545 """Testcase to test the various utility functions in the urllib."""
1546
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001547 def test_thishost(self):
1548 """Test the urllib.request.thishost utility function returns a tuple"""
1549 self.assertIsInstance(urllib.request.thishost(), tuple)
1550
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001551
Xtreakc661b302019-05-19 19:10:06 +05301552class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001553 """Testcase to test the open method of URLopener class."""
1554
1555 def test_quoted_open(self):
1556 class DummyURLopener(urllib.request.URLopener):
1557 def open_spam(self, url):
1558 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001559 with support.check_warnings(
1560 ('DummyURLopener style of invoking requests is deprecated.',
1561 DeprecationWarning)):
1562 self.assertEqual(DummyURLopener().open(
1563 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001564
Ezio Melotti79b99db2013-02-21 02:41:42 +02001565 # test the safe characters are not quoted by urlopen
1566 self.assertEqual(DummyURLopener().open(
1567 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1568 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001569
Xtreakc661b302019-05-19 19:10:06 +05301570 @support.ignore_warnings(category=DeprecationWarning)
1571 def test_urlopener_retrieve_file(self):
1572 with support.temp_dir() as tmpdir:
1573 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1574 os.close(fd)
1575 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1576 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001577 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1578 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301579
1580 @support.ignore_warnings(category=DeprecationWarning)
1581 def test_urlopener_retrieve_remote(self):
1582 url = "http://www.python.org/file.txt"
1583 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1584 self.addCleanup(self.unfakehttp)
1585 filename, _ = urllib.request.URLopener().retrieve(url)
1586 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1587
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001588 @support.ignore_warnings(category=DeprecationWarning)
1589 def test_local_file_open(self):
1590 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1591 class DummyURLopener(urllib.request.URLopener):
1592 def open_local_file(self, url):
1593 return url
1594 for url in ('local_file://example', 'local-file://example'):
1595 self.assertRaises(OSError, urllib.request.urlopen, url)
1596 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1597 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1598 self.assertRaises(OSError, DummyURLopener().open, url)
1599 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1600
Xtreakc661b302019-05-19 19:10:06 +05301601
Senthil Kumarande49d642011-10-16 23:54:44 +08001602class RequestTests(unittest.TestCase):
1603 """Unit tests for urllib.request.Request."""
1604
1605 def test_default_values(self):
1606 Request = urllib.request.Request
1607 request = Request("http://www.python.org")
1608 self.assertEqual(request.get_method(), 'GET')
1609 request = Request("http://www.python.org", {})
1610 self.assertEqual(request.get_method(), 'POST')
1611
1612 def test_with_method_arg(self):
1613 Request = urllib.request.Request
1614 request = Request("http://www.python.org", method='HEAD')
1615 self.assertEqual(request.method, 'HEAD')
1616 self.assertEqual(request.get_method(), 'HEAD')
1617 request = Request("http://www.python.org", {}, method='HEAD')
1618 self.assertEqual(request.method, 'HEAD')
1619 self.assertEqual(request.get_method(), 'HEAD')
1620 request = Request("http://www.python.org", method='GET')
1621 self.assertEqual(request.get_method(), 'GET')
1622 request.method = 'HEAD'
1623 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001624
1625
Senthil Kumaran277e9092013-04-10 20:51:19 -07001626class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001627
Senthil Kumaran277e9092013-04-10 20:51:19 -07001628 def test_converting_drive_letter(self):
1629 self.assertEqual(url2pathname("///C|"), 'C:')
1630 self.assertEqual(url2pathname("///C:"), 'C:')
1631 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001632
Senthil Kumaran277e9092013-04-10 20:51:19 -07001633 def test_converting_when_no_drive_letter(self):
1634 # cannot end a raw string in \
1635 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1636 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1637
1638 def test_simple_compare(self):
1639 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1640 r'C:\foo\bar\spam.foo')
1641
1642 def test_non_ascii_drive_letter(self):
1643 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1644
1645 def test_roundtrip_url2pathname(self):
1646 list_of_paths = ['C:',
1647 r'\\\C\test\\',
1648 r'C:\foo\bar\spam.foo'
1649 ]
1650 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001651 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001652
1653class PathName2URLTests(unittest.TestCase):
1654
1655 def test_converting_drive_letter(self):
1656 self.assertEqual(pathname2url("C:"), '///C:')
1657 self.assertEqual(pathname2url("C:\\"), '///C:')
1658
1659 def test_converting_when_no_drive_letter(self):
1660 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1661 '/////folder/test/')
1662 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1663 '////folder/test/')
1664 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1665 '/folder/test/')
1666
1667 def test_simple_compare(self):
1668 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1669 "///C:/foo/bar/spam.foo" )
1670
1671 def test_long_drive_letter(self):
1672 self.assertRaises(IOError, pathname2url, "XX:\\")
1673
1674 def test_roundtrip_pathname2url(self):
1675 list_of_paths = ['///C:',
1676 '/////folder/test/',
1677 '///C:/foo/bar/spam.foo']
1678 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001679 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001680
1681if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001682 unittest.main()