blob: ebeb9a001453c18cf90936e5d4f7a015d41b3c0a [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Victor Stinnereb976e42019-06-12 04:07:38 +020059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Victor Stinnereb976e42019-06-12 04:07:38 +020093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Victor Stinnereb976e42019-06-12 04:07:38 +0200107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Victor Stinnereb976e42019-06-12 04:07:38 +0200110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def tearDown(self):
157 """Shut down the open object"""
158 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000159 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_interface(self):
162 # Make sure object returned by urlopen() has the specified methods
163 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000164 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000165 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "object returned by urlopen() lacks %s attribute" %
167 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_read(self):
170 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_readline(self):
173 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000174 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 "calling readline() after exhausting the file did not"
176 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_readlines(self):
179 lines_list = self.returned_obj.readlines()
180 self.assertEqual(len(lines_list), 1,
181 "readlines() returned the wrong number of lines")
182 self.assertEqual(lines_list[0], self.text,
183 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 def test_fileno(self):
186 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000187 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 self.assertEqual(os.read(file_num, len(self.text)), self.text,
189 "Reading on the file descriptor returned by fileno() "
190 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800193 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000194 # by the tearDown() method for the test
195 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700197 def test_headers(self):
198 self.assertIsInstance(self.returned_obj.headers, email.message.Message)
199
200 def test_url(self):
201 self.assertEqual(self.returned_obj.url, self.pathname)
202
203 def test_status(self):
204 self.assertIsNone(self.returned_obj.status)
205
Brett Cannon74bfd702003-04-25 09:39:47 +0000206 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000207 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000208
Brett Cannon74bfd702003-04-25 09:39:47 +0000209 def test_geturl(self):
210 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000211
Christian Heimes9bd667a2008-01-20 15:14:11 +0000212 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000213 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000214
Brett Cannon74bfd702003-04-25 09:39:47 +0000215 def test_iter(self):
216 # Test iterator
217 # Don't need to count number of iterations since test would fail the
218 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200219 # comparison.
220 # Use the iterator in the usual implicit way to test for ticket #4608.
221 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000222 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000223
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800224 def test_relativelocalfile(self):
225 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
226
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700227
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000228class ProxyTests(unittest.TestCase):
229
230 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000231 # Records changes to env vars
232 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000233 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000234 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000235 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000236 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000237
238 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000239 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000240 self.env.__exit__()
241 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000242
243 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000244 self.env.set('NO_PROXY', 'localhost')
245 proxies = urllib.request.getproxies_environment()
246 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000247 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800248 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700249 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800250 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700251 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
252 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
253
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700254 def test_proxy_cgi_ignore(self):
255 try:
256 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
257 proxies = urllib.request.getproxies_environment()
258 self.assertEqual('http://somewhere:3128', proxies['http'])
259 self.env.set('REQUEST_METHOD', 'GET')
260 proxies = urllib.request.getproxies_environment()
261 self.assertNotIn('http', proxies)
262 finally:
263 self.env.unset('REQUEST_METHOD')
264 self.env.unset('HTTP_PROXY')
265
Martin Panteraa279822016-04-30 01:03:40 +0000266 def test_proxy_bypass_environment_host_match(self):
267 bypass = urllib.request.proxy_bypass_environment
268 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800269 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000270 self.assertTrue(bypass('localhost'))
271 self.assertTrue(bypass('LocalHost')) # MixedCase
272 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200273 self.assertTrue(bypass('.localhost'))
Martin Panteraa279822016-04-30 01:03:40 +0000274 self.assertTrue(bypass('newdomain.com:1234'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200275 self.assertTrue(bypass('.newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800276 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200277 self.assertTrue(bypass('d.o.t'))
Martin Panteraa279822016-04-30 01:03:40 +0000278 self.assertTrue(bypass('anotherdomain.com:8888'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200279 self.assertTrue(bypass('.anotherdomain.com:8888'))
Martin Panteraa279822016-04-30 01:03:40 +0000280 self.assertTrue(bypass('www.newdomain.com:1234'))
281 self.assertFalse(bypass('prelocalhost'))
282 self.assertFalse(bypass('newdomain.com')) # no port
283 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700284
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200285 def test_proxy_bypass_environment_always_match(self):
286 bypass = urllib.request.proxy_bypass_environment
287 self.env.set('NO_PROXY', '*')
288 self.assertTrue(bypass('newdomain.com'))
289 self.assertTrue(bypass('newdomain.com:1234'))
290 self.env.set('NO_PROXY', '*, anotherdomain.com')
291 self.assertTrue(bypass('anotherdomain.com'))
292 self.assertFalse(bypass('newdomain.com'))
293 self.assertFalse(bypass('newdomain.com:1234'))
294
295 def test_proxy_bypass_environment_newline(self):
296 bypass = urllib.request.proxy_bypass_environment
297 self.env.set('NO_PROXY',
298 'localhost, anotherdomain.com, newdomain.com:1234')
299 self.assertFalse(bypass('localhost\n'))
300 self.assertFalse(bypass('anotherdomain.com:8888\n'))
301 self.assertFalse(bypass('newdomain.com:1234\n'))
302
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700303
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700304class ProxyTests_withOrderedEnv(unittest.TestCase):
305
306 def setUp(self):
307 # We need to test conditions, where variable order _is_ significant
308 self._saved_env = os.environ
309 # Monkey patch os.environ, start with empty fake environment
310 os.environ = collections.OrderedDict()
311
312 def tearDown(self):
313 os.environ = self._saved_env
314
315 def test_getproxies_environment_prefer_lowercase(self):
316 # Test lowercase preference with removal
317 os.environ['no_proxy'] = ''
318 os.environ['No_Proxy'] = 'localhost'
319 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
320 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
321 os.environ['http_proxy'] = ''
322 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
323 proxies = urllib.request.getproxies_environment()
324 self.assertEqual({}, proxies)
325 # Test lowercase preference of proxy bypass and correct matching including ports
326 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
327 os.environ['No_Proxy'] = 'xyz.com'
328 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
329 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
330 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
331 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
332 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
333 # Test lowercase preference with replacement
334 os.environ['http_proxy'] = 'http://somewhere:3128'
335 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
336 proxies = urllib.request.getproxies_environment()
337 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000338
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700339
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700340class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000341 """Test urlopen() opening a fake http connection."""
342
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000343 def check_read(self, ver):
344 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000345 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000346 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000347 self.assertEqual(fp.readline(), b"Hello!")
348 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000349 self.assertEqual(fp.geturl(), 'http://python.org/')
350 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000351 finally:
352 self.unfakehttp()
353
Senthil Kumaran26430412011-04-13 07:01:19 +0800354 def test_url_fragment(self):
355 # Issue #11703: geturl() omits fragments in the original URL.
356 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800357 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800358 try:
359 fp = urllib.request.urlopen(url)
360 self.assertEqual(fp.geturl(), url)
361 finally:
362 self.unfakehttp()
363
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800364 def test_willclose(self):
365 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800366 try:
367 resp = urlopen("http://www.python.org")
368 self.assertTrue(resp.fp.will_close)
369 finally:
370 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800371
Xtreak2fc936e2019-05-01 17:29:49 +0530372 @unittest.skipUnless(ssl, "ssl module required")
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400373 def test_url_path_with_control_char_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700374 for char_no in list(range(0, 0x21)) + [0x7f]:
375 char = chr(char_no)
376 schemeless_url = f"//localhost:7777/test{char}/"
377 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
378 try:
379 # We explicitly test urllib.request.urlopen() instead of the top
380 # level 'def urlopen()' function defined in this... (quite ugly)
381 # test suite. They use different url opening codepaths. Plain
382 # urlopen uses FancyURLOpener which goes via a codepath that
383 # calls urllib.parse.quote() on the URL which makes all of the
384 # above attempts at injection within the url _path_ safe.
385 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400386 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700387 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400388 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700389 urllib.request.urlopen(f"http:{schemeless_url}")
390 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400391 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700392 urllib.request.urlopen(f"https:{schemeless_url}")
393 # This code path quotes the URL so there is no injection.
394 resp = urlopen(f"http:{schemeless_url}")
395 self.assertNotIn(char, resp.geturl())
396 finally:
397 self.unfakehttp()
398
Xtreak2fc936e2019-05-01 17:29:49 +0530399 @unittest.skipUnless(ssl, "ssl module required")
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400400 def test_url_path_with_newline_header_injection_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700401 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
402 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
403 schemeless_url = "//" + host + ":8080/test/?test=a"
404 try:
405 # We explicitly test urllib.request.urlopen() instead of the top
406 # level 'def urlopen()' function defined in this... (quite ugly)
407 # test suite. They use different url opening codepaths. Plain
408 # urlopen uses FancyURLOpener which goes via a codepath that
409 # calls urllib.parse.quote() on the URL which makes all of the
410 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400411 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700412 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400413 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700414 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400415 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700416 urllib.request.urlopen(f"https:{schemeless_url}")
417 # This code path quotes the URL so there is no injection.
418 resp = urlopen(f"http:{schemeless_url}")
419 self.assertNotIn(' ', resp.geturl())
420 self.assertNotIn('\r', resp.geturl())
421 self.assertNotIn('\n', resp.geturl())
422 finally:
423 self.unfakehttp()
424
Ashwin Ramaswami9165add2020-03-14 14:56:06 -0400425 @unittest.skipUnless(ssl, "ssl module required")
426 def test_url_host_with_control_char_rejected(self):
427 for char_no in list(range(0, 0x21)) + [0x7f]:
428 char = chr(char_no)
429 schemeless_url = f"//localhost{char}/test/"
430 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
431 try:
432 escaped_char_repr = repr(char).replace('\\', r'\\')
433 InvalidURL = http.client.InvalidURL
434 with self.assertRaisesRegex(
435 InvalidURL, f"contain control.*{escaped_char_repr}"):
436 urlopen(f"http:{schemeless_url}")
437 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
438 urlopen(f"https:{schemeless_url}")
439 finally:
440 self.unfakehttp()
441
442 @unittest.skipUnless(ssl, "ssl module required")
443 def test_url_host_with_newline_header_injection_rejected(self):
444 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
445 host = "localhost\r\nX-injected: header\r\n"
446 schemeless_url = "//" + host + ":8080/test/?test=a"
447 try:
448 InvalidURL = http.client.InvalidURL
449 with self.assertRaisesRegex(
450 InvalidURL, r"contain control.*\\r"):
451 urlopen(f"http:{schemeless_url}")
452 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
453 urlopen(f"https:{schemeless_url}")
454 finally:
455 self.unfakehttp()
456
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000457 def test_read_0_9(self):
458 # "0.9" response accepted (but not "simple responses" without
459 # a status line)
460 self.check_read(b"0.9")
461
462 def test_read_1_0(self):
463 self.check_read(b"1.0")
464
465 def test_read_1_1(self):
466 self.check_read(b"1.1")
467
Christian Heimes57dddfb2008-01-02 18:30:52 +0000468 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200469 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000470 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
471Date: Wed, 02 Jan 2008 03:03:54 GMT
472Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
473Connection: close
474Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200475''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000476 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200477 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000478 finally:
479 self.unfakehttp()
480
guido@google.coma119df92011-03-29 11:41:02 -0700481 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200482 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700483 self.fakehttp(b'''HTTP/1.1 302 Found
484Date: Wed, 02 Jan 2008 03:03:54 GMT
485Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
486Location: file://guidocomputer.athome.com:/python/license
487Connection: close
488Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200489''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700490 try:
Martin Pantera0370222016-02-04 06:01:35 +0000491 msg = "Redirection to url 'file:"
492 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
493 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700494 finally:
495 self.unfakehttp()
496
Martin Pantera0370222016-02-04 06:01:35 +0000497 def test_redirect_limit_independent(self):
498 # Ticket #12923: make sure independent requests each use their
499 # own retry limit.
500 for i in range(FancyURLopener().maxtries):
501 self.fakehttp(b'''HTTP/1.1 302 Found
502Location: file://guidocomputer.athome.com:/python/license
503Connection: close
Victor Stinnereb976e42019-06-12 04:07:38 +0200504''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000505 try:
506 self.assertRaises(urllib.error.HTTPError, urlopen,
507 "http://something")
508 finally:
509 self.unfakehttp()
510
Guido van Rossumd8faa362007-04-27 19:54:29 +0000511 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200512 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000513 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000514 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000515 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200516 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000517 finally:
518 self.unfakehttp()
519
Senthil Kumaranf5776862012-10-21 13:30:02 -0700520 def test_missing_localfile(self):
521 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700522 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700523 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700524 self.assertTrue(e.exception.filename)
525 self.assertTrue(e.exception.reason)
526
527 def test_file_notexists(self):
528 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700529 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700530 try:
531 self.assertTrue(os.path.exists(tmp_file))
532 with urlopen(tmp_fileurl) as fobj:
533 self.assertTrue(fobj)
534 finally:
535 os.close(fd)
536 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700537 self.assertFalse(os.path.exists(tmp_file))
538 with self.assertRaises(urllib.error.URLError):
539 urlopen(tmp_fileurl)
540
541 def test_ftp_nohost(self):
542 test_ftp_url = 'ftp:///path'
543 with self.assertRaises(urllib.error.URLError) as e:
544 urlopen(test_ftp_url)
545 self.assertFalse(e.exception.filename)
546 self.assertTrue(e.exception.reason)
547
548 def test_ftp_nonexisting(self):
549 with self.assertRaises(urllib.error.URLError) as e:
550 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
551 self.assertFalse(e.exception.filename)
552 self.assertTrue(e.exception.reason)
553
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700554 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
555 def test_ftp_cache_pruning(self):
556 self.fakeftp()
557 try:
558 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
559 urlopen('ftp://localhost')
560 finally:
561 self.unfakeftp()
562
Senthil Kumarande0eb242010-08-01 17:53:37 +0000563 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000564 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000565 try:
566 fp = urlopen("http://user:pass@python.org/")
567 self.assertEqual(fp.readline(), b"Hello!")
568 self.assertEqual(fp.readline(), b"")
569 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
570 self.assertEqual(fp.getcode(), 200)
571 finally:
572 self.unfakehttp()
573
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800574 def test_userpass_inurl_w_spaces(self):
575 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
576 try:
577 userpass = "a b:c d"
578 url = "http://{}@python.org/".format(userpass)
579 fakehttp_wrapper = http.client.HTTPConnection
580 authorization = ("Authorization: Basic %s\r\n" %
581 b64encode(userpass.encode("ASCII")).decode("ASCII"))
582 fp = urlopen(url)
583 # The authorization header must be in place
584 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
585 self.assertEqual(fp.readline(), b"Hello!")
586 self.assertEqual(fp.readline(), b"")
587 # the spaces are quoted in URL so no match
588 self.assertNotEqual(fp.geturl(), url)
589 self.assertEqual(fp.getcode(), 200)
590 finally:
591 self.unfakehttp()
592
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700593 def test_URLopener_deprecation(self):
594 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700595 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700596
Antoine Pitrou07df6552014-11-02 17:23:14 +0100597 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800598 def test_cafile_and_context(self):
599 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200600 with support.check_warnings(('', DeprecationWarning)):
601 with self.assertRaises(ValueError):
602 urllib.request.urlopen(
603 "https://localhost", cafile="/nonexistent/path", context=context
604 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800605
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700606
Antoine Pitroudf204be2012-11-24 17:59:08 +0100607class urlopen_DataTests(unittest.TestCase):
608 """Test urlopen() opening a data URL."""
609
610 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200611 # clear _opener global variable
612 self.addCleanup(urllib.request.urlcleanup)
613
Antoine Pitroudf204be2012-11-24 17:59:08 +0100614 # text containing URL special- and unicode-characters
615 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
616 # 2x1 pixel RGB PNG image with one black and one white pixel
617 self.image = (
618 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
619 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
620 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
621 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
622
623 self.text_url = (
624 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
625 "D%26%20%C3%B6%20%C3%84%20")
626 self.text_url_base64 = (
627 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
628 "sJT0mIPYgxCA%3D")
629 # base64 encoded data URL that contains ignorable spaces,
630 # such as "\n", " ", "%0A", and "%20".
631 self.image_url = (
632 "\n"
633 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
634 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
635
636 self.text_url_resp = urllib.request.urlopen(self.text_url)
637 self.text_url_base64_resp = urllib.request.urlopen(
638 self.text_url_base64)
639 self.image_url_resp = urllib.request.urlopen(self.image_url)
640
641 def test_interface(self):
642 # Make sure object returned by urlopen() has the specified methods
643 for attr in ("read", "readline", "readlines",
644 "close", "info", "geturl", "getcode", "__iter__"):
645 self.assertTrue(hasattr(self.text_url_resp, attr),
646 "object returned by urlopen() lacks %s attribute" %
647 attr)
648
649 def test_info(self):
650 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
651 self.assertEqual(self.text_url_base64_resp.info().get_params(),
652 [('text/plain', ''), ('charset', 'ISO-8859-1')])
653 self.assertEqual(self.image_url_resp.info()['content-length'],
654 str(len(self.image)))
655 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
656 [('text/plain', ''), ('charset', 'US-ASCII')])
657
658 def test_geturl(self):
659 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
660 self.assertEqual(self.text_url_base64_resp.geturl(),
661 self.text_url_base64)
662 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
663
664 def test_read_text(self):
665 self.assertEqual(self.text_url_resp.read().decode(
666 dict(self.text_url_resp.info().get_params())['charset']), self.text)
667
668 def test_read_text_base64(self):
669 self.assertEqual(self.text_url_base64_resp.read().decode(
670 dict(self.text_url_base64_resp.info().get_params())['charset']),
671 self.text)
672
673 def test_read_image(self):
674 self.assertEqual(self.image_url_resp.read(), self.image)
675
676 def test_missing_comma(self):
677 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
678
679 def test_invalid_base64_data(self):
680 # missing padding character
681 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
682
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700683
Brett Cannon19691362003-04-29 05:08:06 +0000684class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000685 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000686
Brett Cannon19691362003-04-29 05:08:06 +0000687 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200688 # clear _opener global variable
689 self.addCleanup(urllib.request.urlcleanup)
690
Georg Brandl5a650a22005-08-26 08:51:34 +0000691 # Create a list of temporary files. Each item in the list is a file
692 # name (absolute path or relative to the current working directory).
693 # All files in this list will be deleted in the tearDown method. Note,
694 # this only helps to makes sure temporary files get deleted, but it
695 # does nothing about trying to close files that may still be open. It
696 # is the responsibility of the developer to properly close files even
697 # when exceptional conditions occur.
698 self.tempFiles = []
699
Brett Cannon19691362003-04-29 05:08:06 +0000700 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000701 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000702 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000703 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000704 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000705 FILE.write(self.text)
706 FILE.close()
707 finally:
708 try: FILE.close()
709 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000710
711 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000712 # Delete the temporary files.
713 for each in self.tempFiles:
714 try: os.remove(each)
715 except: pass
716
717 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000718 filePath = os.path.abspath(filePath)
719 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000720 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000721 except UnicodeEncodeError:
722 raise unittest.SkipTest("filePath is not encodable to utf8")
723 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000724
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000725 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000726 """Creates a new temporary file containing the specified data,
727 registers the file for deletion during the test fixture tear down, and
728 returns the absolute path of the file."""
729
730 newFd, newFilePath = tempfile.mkstemp()
731 try:
732 self.registerFileForCleanUp(newFilePath)
733 newFile = os.fdopen(newFd, "wb")
734 newFile.write(data)
735 newFile.close()
736 finally:
737 try: newFile.close()
738 except: pass
739 return newFilePath
740
741 def registerFileForCleanUp(self, fileName):
742 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000743
744 def test_basic(self):
745 # Make sure that a local file just gets its own location returned and
746 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000747 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000748 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000749 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000750 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000751 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000752
753 def test_copy(self):
754 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000755 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000756 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000757 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000758 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000759 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000760 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000761 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000762 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000763 try:
764 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000765 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000766 finally:
767 try: FILE.close()
768 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000769 self.assertEqual(self.text, text)
770
771 def test_reporthook(self):
772 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700773 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
774 self.assertIsInstance(block_count, int)
775 self.assertIsInstance(block_read_size, int)
776 self.assertIsInstance(file_size, int)
777 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000778 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000779 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000780 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000781 urllib.request.urlretrieve(
782 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000783 second_temp, hooktester)
784
785 def test_reporthook_0_bytes(self):
786 # Test on zero length file. Should call reporthook only 1 time.
787 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700788 def hooktester(block_count, block_read_size, file_size, _report=report):
789 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000790 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000791 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000792 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000793 self.assertEqual(len(report), 1)
794 self.assertEqual(report[0][2], 0)
795
796 def test_reporthook_5_bytes(self):
797 # Test on 5 byte file. Should call reporthook only 2 times (once when
798 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700799 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000800 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700801 def hooktester(block_count, block_read_size, file_size, _report=report):
802 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000803 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000805 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000806 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800807 self.assertEqual(report[0][2], 5)
808 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000809
810 def test_reporthook_8193_bytes(self):
811 # Test on 8193 byte file. Should call reporthook only 3 times (once
812 # when the "network connection" is established, once for the next 8192
813 # bytes, and once for the last byte).
814 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700815 def hooktester(block_count, block_read_size, file_size, _report=report):
816 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000817 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000819 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000820 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800821 self.assertEqual(report[0][2], 8193)
822 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700823 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800824 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000825
Senthil Kumarance260142011-11-01 01:35:17 +0800826
827class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
828 """Test urllib.urlretrieve() using fake http connections"""
829
830 def test_short_content_raises_ContentTooShortError(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200831 self.addCleanup(urllib.request.urlcleanup)
832
Senthil Kumarance260142011-11-01 01:35:17 +0800833 self.fakehttp(b'''HTTP/1.1 200 OK
834Date: Wed, 02 Jan 2008 03:03:54 GMT
835Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
836Connection: close
837Content-Length: 100
838Content-Type: text/html; charset=iso-8859-1
839
840FF
841''')
842
843 def _reporthook(par1, par2, par3):
844 pass
845
846 with self.assertRaises(urllib.error.ContentTooShortError):
847 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100848 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800849 reporthook=_reporthook)
850 finally:
851 self.unfakehttp()
852
853 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200854 self.addCleanup(urllib.request.urlcleanup)
855
Senthil Kumarance260142011-11-01 01:35:17 +0800856 self.fakehttp(b'''HTTP/1.1 200 OK
857Date: Wed, 02 Jan 2008 03:03:54 GMT
858Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
859Connection: close
860Content-Length: 100
861Content-Type: text/html; charset=iso-8859-1
862
863FF
864''')
865 with self.assertRaises(urllib.error.ContentTooShortError):
866 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100867 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800868 finally:
869 self.unfakehttp()
870
871
Brett Cannon74bfd702003-04-25 09:39:47 +0000872class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400873 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000874
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530875 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000876 character you write it as '%' + <2 character US-ASCII hex value>.
877 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
878 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000879
880 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000881
Brett Cannon74bfd702003-04-25 09:39:47 +0000882 Reserved characters : ";/?:@&=+$,"
883 Have special meaning in URIs and must be escaped if not being used for
884 their special meaning
885 Data characters : letters, digits, and "-_.!~*'()"
886 Unreserved and do not need to be escaped; can be, though, if desired
887 Control characters : 0x00 - 0x1F, 0x7F
888 Have no use in URIs so must be escaped
889 space : 0x20
890 Must be escaped
891 Delimiters : '<>#%"'
892 Must be escaped
893 Unwise : "{}|\^[]`"
894 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000895
Brett Cannon74bfd702003-04-25 09:39:47 +0000896 """
897
898 def test_never_quote(self):
899 # Make sure quote() does not quote letters, digits, and "_,.-"
900 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
901 "abcdefghijklmnopqrstuvwxyz",
902 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530903 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000904 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000905 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000906 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000907 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000908 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000909 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000910
911 def test_default_safe(self):
912 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000913 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000914
915 def test_safe(self):
916 # Test setting 'safe' parameter does what it should do
917 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000918 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000919 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000920 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000921 result = urllib.parse.quote_plus(quote_by_default,
922 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000923 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000924 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000925 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000926 # Safe expressed as bytes rather than str
927 result = urllib.parse.quote(quote_by_default, safe=b"<>")
928 self.assertEqual(quote_by_default, result,
929 "using quote(): %r != %r" % (quote_by_default, result))
930 # "Safe" non-ASCII characters should have no effect
931 # (Since URIs are not allowed to have non-ASCII characters)
932 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
933 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
934 self.assertEqual(expect, result,
935 "using quote(): %r != %r" %
936 (expect, result))
937 # Same as above, but using a bytes rather than str
938 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
939 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
940 self.assertEqual(expect, result,
941 "using quote(): %r != %r" %
942 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000943
944 def test_default_quoting(self):
945 # Make sure all characters that should be quoted are by default sans
946 # space (separate test for that).
947 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400948 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000949 should_quote.append(chr(127)) # For 0x7F
950 should_quote = ''.join(should_quote)
951 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000952 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000953 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000954 "using quote(): "
955 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000956 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000957 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000958 self.assertEqual(hexescape(char), result,
959 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000960 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000961 (char, hexescape(char), result))
962 del should_quote
963 partial_quote = "ab[]cd"
964 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000966 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000967 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800968 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000969 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000970 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000971
972 def test_quoting_space(self):
973 # Make sure quote() and quote_plus() handle spaces as specified in
974 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000975 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000976 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000977 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000978 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000979 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000980 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000981 given = "a b cd e f"
982 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000983 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000984 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000985 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000986 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000987 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000988 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000989 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000990
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000991 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000992 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000993 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000994 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000995 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000996 # Test with bytes
997 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
998 'alpha%2Bbeta+gamma')
999 # Test with safe bytes
1000 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
1001 'alpha+beta+gamma')
1002
1003 def test_quote_bytes(self):
1004 # Bytes should quote directly to percent-encoded values
1005 given = b"\xa2\xd8ab\xff"
1006 expect = "%A2%D8ab%FF"
1007 result = urllib.parse.quote(given)
1008 self.assertEqual(expect, result,
1009 "using quote(): %r != %r" % (expect, result))
1010 # Encoding argument should raise type error on bytes input
1011 self.assertRaises(TypeError, urllib.parse.quote, given,
1012 encoding="latin-1")
1013 # quote_from_bytes should work the same
1014 result = urllib.parse.quote_from_bytes(given)
1015 self.assertEqual(expect, result,
1016 "using quote_from_bytes(): %r != %r"
1017 % (expect, result))
1018
1019 def test_quote_with_unicode(self):
1020 # Characters in Latin-1 range, encoded by default in UTF-8
1021 given = "\xa2\xd8ab\xff"
1022 expect = "%C2%A2%C3%98ab%C3%BF"
1023 result = urllib.parse.quote(given)
1024 self.assertEqual(expect, result,
1025 "using quote(): %r != %r" % (expect, result))
1026 # Characters in Latin-1 range, encoded by with None (default)
1027 result = urllib.parse.quote(given, encoding=None, errors=None)
1028 self.assertEqual(expect, result,
1029 "using quote(): %r != %r" % (expect, result))
1030 # Characters in Latin-1 range, encoded with Latin-1
1031 given = "\xa2\xd8ab\xff"
1032 expect = "%A2%D8ab%FF"
1033 result = urllib.parse.quote(given, encoding="latin-1")
1034 self.assertEqual(expect, result,
1035 "using quote(): %r != %r" % (expect, result))
1036 # Characters in BMP, encoded by default in UTF-8
1037 given = "\u6f22\u5b57" # "Kanji"
1038 expect = "%E6%BC%A2%E5%AD%97"
1039 result = urllib.parse.quote(given)
1040 self.assertEqual(expect, result,
1041 "using quote(): %r != %r" % (expect, result))
1042 # Characters in BMP, encoded with Latin-1
1043 given = "\u6f22\u5b57"
1044 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1045 encoding="latin-1")
1046 # Characters in BMP, encoded with Latin-1, with replace error handling
1047 given = "\u6f22\u5b57"
1048 expect = "%3F%3F" # "??"
1049 result = urllib.parse.quote(given, encoding="latin-1",
1050 errors="replace")
1051 self.assertEqual(expect, result,
1052 "using quote(): %r != %r" % (expect, result))
1053 # Characters in BMP, Latin-1, with xmlcharref error handling
1054 given = "\u6f22\u5b57"
1055 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1056 result = urllib.parse.quote(given, encoding="latin-1",
1057 errors="xmlcharrefreplace")
1058 self.assertEqual(expect, result,
1059 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001060
Georg Brandlfaf41492009-05-26 18:31:11 +00001061 def test_quote_plus_with_unicode(self):
1062 # Encoding (latin-1) test for quote_plus
1063 given = "\xa2\xd8 \xff"
1064 expect = "%A2%D8+%FF"
1065 result = urllib.parse.quote_plus(given, encoding="latin-1")
1066 self.assertEqual(expect, result,
1067 "using quote_plus(): %r != %r" % (expect, result))
1068 # Errors test for quote_plus
1069 given = "ab\u6f22\u5b57 cd"
1070 expect = "ab%3F%3F+cd"
1071 result = urllib.parse.quote_plus(given, encoding="latin-1",
1072 errors="replace")
1073 self.assertEqual(expect, result,
1074 "using quote_plus(): %r != %r" % (expect, result))
1075
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001076
Brett Cannon74bfd702003-04-25 09:39:47 +00001077class UnquotingTests(unittest.TestCase):
1078 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001079
Brett Cannon74bfd702003-04-25 09:39:47 +00001080 See the doc string for quoting_Tests for details on quoting and such.
1081
1082 """
1083
1084 def test_unquoting(self):
1085 # Make sure unquoting of all ASCII values works
1086 escape_list = []
1087 for num in range(128):
1088 given = hexescape(chr(num))
1089 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001091 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001092 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001093 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001094 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001095 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001096 (expect, result))
1097 escape_list.append(given)
1098 escape_string = ''.join(escape_list)
1099 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001100 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001101 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001102 "using unquote(): not all characters escaped: "
1103 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001104 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1105 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Brett Cannon74bfd702003-04-25 09:39:47 +00001106
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001107 def test_unquoting_badpercent(self):
1108 # Test unquoting on bad percent-escapes
1109 given = '%xab'
1110 expect = given
1111 result = urllib.parse.unquote(given)
1112 self.assertEqual(expect, result, "using unquote(): %r != %r"
1113 % (expect, result))
1114 given = '%x'
1115 expect = given
1116 result = urllib.parse.unquote(given)
1117 self.assertEqual(expect, result, "using unquote(): %r != %r"
1118 % (expect, result))
1119 given = '%'
1120 expect = given
1121 result = urllib.parse.unquote(given)
1122 self.assertEqual(expect, result, "using unquote(): %r != %r"
1123 % (expect, result))
1124 # unquote_to_bytes
1125 given = '%xab'
1126 expect = bytes(given, 'ascii')
1127 result = urllib.parse.unquote_to_bytes(given)
1128 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1129 % (expect, result))
1130 given = '%x'
1131 expect = bytes(given, 'ascii')
1132 result = urllib.parse.unquote_to_bytes(given)
1133 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1134 % (expect, result))
1135 given = '%'
1136 expect = bytes(given, 'ascii')
1137 result = urllib.parse.unquote_to_bytes(given)
1138 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1139 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001140 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1141 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001142
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001143 def test_unquoting_mixed_case(self):
1144 # Test unquoting on mixed-case hex digits in the percent-escapes
1145 given = '%Ab%eA'
1146 expect = b'\xab\xea'
1147 result = urllib.parse.unquote_to_bytes(given)
1148 self.assertEqual(expect, result,
1149 "using unquote_to_bytes(): %r != %r"
1150 % (expect, result))
1151
Brett Cannon74bfd702003-04-25 09:39:47 +00001152 def test_unquoting_parts(self):
1153 # Make sure unquoting works when have non-quoted characters
1154 # interspersed
1155 given = 'ab%sd' % hexescape('c')
1156 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001157 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001158 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001159 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001160 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001161 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001162 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001163
Brett Cannon74bfd702003-04-25 09:39:47 +00001164 def test_unquoting_plus(self):
1165 # Test difference between unquote() and unquote_plus()
1166 given = "are+there+spaces..."
1167 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001168 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001169 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001170 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001171 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001172 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001173 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001174 "using unquote_plus(): %r != %r" % (expect, result))
1175
1176 def test_unquote_to_bytes(self):
1177 given = 'br%C3%BCckner_sapporo_20050930.doc'
1178 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1179 result = urllib.parse.unquote_to_bytes(given)
1180 self.assertEqual(expect, result,
1181 "using unquote_to_bytes(): %r != %r"
1182 % (expect, result))
1183 # Test on a string with unescaped non-ASCII characters
1184 # (Technically an invalid URI; expect those characters to be UTF-8
1185 # encoded).
1186 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1187 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1188 self.assertEqual(expect, result,
1189 "using unquote_to_bytes(): %r != %r"
1190 % (expect, result))
1191 # Test with a bytes as input
1192 given = b'%A2%D8ab%FF'
1193 expect = b'\xa2\xd8ab\xff'
1194 result = urllib.parse.unquote_to_bytes(given)
1195 self.assertEqual(expect, result,
1196 "using unquote_to_bytes(): %r != %r"
1197 % (expect, result))
1198 # Test with a bytes as input, with unescaped non-ASCII bytes
1199 # (Technically an invalid URI; expect those bytes to be preserved)
1200 given = b'%A2\xd8ab%FF'
1201 expect = b'\xa2\xd8ab\xff'
1202 result = urllib.parse.unquote_to_bytes(given)
1203 self.assertEqual(expect, result,
1204 "using unquote_to_bytes(): %r != %r"
1205 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001206
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001207 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001208 # Characters in the Latin-1 range, encoded with UTF-8
1209 given = 'br%C3%BCckner_sapporo_20050930.doc'
1210 expect = 'br\u00fcckner_sapporo_20050930.doc'
1211 result = urllib.parse.unquote(given)
1212 self.assertEqual(expect, result,
1213 "using unquote(): %r != %r" % (expect, result))
1214 # Characters in the Latin-1 range, encoded with None (default)
1215 result = urllib.parse.unquote(given, encoding=None, errors=None)
1216 self.assertEqual(expect, result,
1217 "using unquote(): %r != %r" % (expect, result))
1218
1219 # Characters in the Latin-1 range, encoded with Latin-1
1220 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1221 encoding="latin-1")
1222 expect = 'br\u00fcckner_sapporo_20050930.doc'
1223 self.assertEqual(expect, result,
1224 "using unquote(): %r != %r" % (expect, result))
1225
1226 # Characters in BMP, encoded with UTF-8
1227 given = "%E6%BC%A2%E5%AD%97"
1228 expect = "\u6f22\u5b57" # "Kanji"
1229 result = urllib.parse.unquote(given)
1230 self.assertEqual(expect, result,
1231 "using unquote(): %r != %r" % (expect, result))
1232
1233 # Decode with UTF-8, invalid sequence
1234 given = "%F3%B1"
1235 expect = "\ufffd" # Replacement character
1236 result = urllib.parse.unquote(given)
1237 self.assertEqual(expect, result,
1238 "using unquote(): %r != %r" % (expect, result))
1239
1240 # Decode with UTF-8, invalid sequence, replace errors
1241 result = urllib.parse.unquote(given, errors="replace")
1242 self.assertEqual(expect, result,
1243 "using unquote(): %r != %r" % (expect, result))
1244
1245 # Decode with UTF-8, invalid sequence, ignoring errors
1246 given = "%F3%B1"
1247 expect = ""
1248 result = urllib.parse.unquote(given, errors="ignore")
1249 self.assertEqual(expect, result,
1250 "using unquote(): %r != %r" % (expect, result))
1251
1252 # A mix of non-ASCII and percent-encoded characters, UTF-8
1253 result = urllib.parse.unquote("\u6f22%C3%BC")
1254 expect = '\u6f22\u00fc'
1255 self.assertEqual(expect, result,
1256 "using unquote(): %r != %r" % (expect, result))
1257
1258 # A mix of non-ASCII and percent-encoded characters, Latin-1
1259 # (Note, the string contains non-Latin-1-representable characters)
1260 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1261 expect = '\u6f22\u00fc'
1262 self.assertEqual(expect, result,
1263 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001264
Stein Karlsenaad2ee02019-10-14 12:36:29 +02001265 def test_unquoting_with_bytes_input(self):
1266 # ASCII characters decoded to a string
1267 given = b'blueberryjam'
1268 expect = 'blueberryjam'
1269 result = urllib.parse.unquote(given)
1270 self.assertEqual(expect, result,
1271 "using unquote(): %r != %r" % (expect, result))
1272
1273 # A mix of non-ASCII hex-encoded characters and ASCII characters
1274 given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
1275 expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
1276 result = urllib.parse.unquote(given)
1277 self.assertEqual(expect, result,
1278 "using unquote(): %r != %r" % (expect, result))
1279
1280 # A mix of non-ASCII percent-encoded characters and ASCII characters
1281 given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
1282 expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
1283 result = urllib.parse.unquote(given)
1284 self.assertEqual(expect, result,
1285 "using unquote(): %r != %r" % (expect, result))
1286
1287
Brett Cannon74bfd702003-04-25 09:39:47 +00001288class urlencode_Tests(unittest.TestCase):
1289 """Tests for urlencode()"""
1290
1291 def help_inputtype(self, given, test_type):
1292 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001293
Brett Cannon74bfd702003-04-25 09:39:47 +00001294 'given' must lead to only the pairs:
1295 * 1st, 1
1296 * 2nd, 2
1297 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001298
Brett Cannon74bfd702003-04-25 09:39:47 +00001299 Test cannot assume anything about order. Docs make no guarantee and
1300 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001301
Brett Cannon74bfd702003-04-25 09:39:47 +00001302 """
1303 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001304 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001305 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001306 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001307 "testing %s: %s not found in %s" %
1308 (test_type, expected, result))
1309 self.assertEqual(result.count('&'), 2,
1310 "testing %s: expected 2 '&'s; got %s" %
1311 (test_type, result.count('&')))
1312 amp_location = result.index('&')
1313 on_amp_left = result[amp_location - 1]
1314 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001315 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001316 "testing %s: '&' not located in proper place in %s" %
1317 (test_type, result))
1318 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1319 "testing %s: "
1320 "unexpected number of characters: %s != %s" %
1321 (test_type, len(result), (5 * 3) + 2))
1322
1323 def test_using_mapping(self):
1324 # Test passing in a mapping object as an argument.
1325 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1326 "using dict as input type")
1327
1328 def test_using_sequence(self):
1329 # Test passing in a sequence of two-item sequences as an argument.
1330 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1331 "using sequence of two-item tuples as input")
1332
1333 def test_quoting(self):
1334 # Make sure keys and values are quoted using quote_plus()
1335 given = {"&":"="}
1336 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001337 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001338 self.assertEqual(expect, result)
1339 given = {"key name":"A bunch of pluses"}
1340 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001341 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001342 self.assertEqual(expect, result)
1343
1344 def test_doseq(self):
1345 # Test that passing True for 'doseq' parameter works correctly
1346 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001347 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1348 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001349 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001350 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001351 for value in given["sequence"]:
1352 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001353 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001354 self.assertEqual(result.count('&'), 2,
1355 "Expected 2 '&'s, got %s" % result.count('&'))
1356
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001357 def test_empty_sequence(self):
1358 self.assertEqual("", urllib.parse.urlencode({}))
1359 self.assertEqual("", urllib.parse.urlencode([]))
1360
1361 def test_nonstring_values(self):
1362 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1363 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1364
1365 def test_nonstring_seq_values(self):
1366 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1367 self.assertEqual("a=None&a=a",
1368 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001369 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001370 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001371 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001372
Senthil Kumarandf022da2010-07-03 17:48:22 +00001373 def test_urlencode_encoding(self):
1374 # ASCII encoding. Expect %3F with errors="replace'
1375 given = (('\u00a0', '\u00c1'),)
1376 expect = '%3F=%3F'
1377 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1378 self.assertEqual(expect, result)
1379
1380 # Default is UTF-8 encoding.
1381 given = (('\u00a0', '\u00c1'),)
1382 expect = '%C2%A0=%C3%81'
1383 result = urllib.parse.urlencode(given)
1384 self.assertEqual(expect, result)
1385
1386 # Latin-1 encoding.
1387 given = (('\u00a0', '\u00c1'),)
1388 expect = '%A0=%C1'
1389 result = urllib.parse.urlencode(given, encoding="latin-1")
1390 self.assertEqual(expect, result)
1391
1392 def test_urlencode_encoding_doseq(self):
1393 # ASCII Encoding. Expect %3F with errors="replace'
1394 given = (('\u00a0', '\u00c1'),)
1395 expect = '%3F=%3F'
1396 result = urllib.parse.urlencode(given, doseq=True,
1397 encoding="ASCII", errors="replace")
1398 self.assertEqual(expect, result)
1399
1400 # ASCII Encoding. On a sequence of values.
1401 given = (("\u00a0", (1, "\u00c1")),)
1402 expect = '%3F=1&%3F=%3F'
1403 result = urllib.parse.urlencode(given, True,
1404 encoding="ASCII", errors="replace")
1405 self.assertEqual(expect, result)
1406
1407 # Utf-8
1408 given = (("\u00a0", "\u00c1"),)
1409 expect = '%C2%A0=%C3%81'
1410 result = urllib.parse.urlencode(given, True)
1411 self.assertEqual(expect, result)
1412
1413 given = (("\u00a0", (42, "\u00c1")),)
1414 expect = '%C2%A0=42&%C2%A0=%C3%81'
1415 result = urllib.parse.urlencode(given, True)
1416 self.assertEqual(expect, result)
1417
1418 # latin-1
1419 given = (("\u00a0", "\u00c1"),)
1420 expect = '%A0=%C1'
1421 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1422 self.assertEqual(expect, result)
1423
1424 given = (("\u00a0", (42, "\u00c1")),)
1425 expect = '%A0=42&%A0=%C1'
1426 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1427 self.assertEqual(expect, result)
1428
1429 def test_urlencode_bytes(self):
1430 given = ((b'\xa0\x24', b'\xc1\x24'),)
1431 expect = '%A0%24=%C1%24'
1432 result = urllib.parse.urlencode(given)
1433 self.assertEqual(expect, result)
1434 result = urllib.parse.urlencode(given, True)
1435 self.assertEqual(expect, result)
1436
1437 # Sequence of values
1438 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1439 expect = '%A0%24=42&%A0%24=%C1%24'
1440 result = urllib.parse.urlencode(given, True)
1441 self.assertEqual(expect, result)
1442
1443 def test_urlencode_encoding_safe_parameter(self):
1444
1445 # Send '$' (\x24) as safe character
1446 # Default utf-8 encoding
1447
1448 given = ((b'\xa0\x24', b'\xc1\x24'),)
1449 result = urllib.parse.urlencode(given, safe=":$")
1450 expect = '%A0$=%C1$'
1451 self.assertEqual(expect, result)
1452
1453 given = ((b'\xa0\x24', b'\xc1\x24'),)
1454 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1455 expect = '%A0$=%C1$'
1456 self.assertEqual(expect, result)
1457
1458 # Safe parameter in sequence
1459 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1460 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1461 result = urllib.parse.urlencode(given, True, safe=":$")
1462 self.assertEqual(expect, result)
1463
1464 # Test all above in latin-1 encoding
1465
1466 given = ((b'\xa0\x24', b'\xc1\x24'),)
1467 result = urllib.parse.urlencode(given, safe=":$",
1468 encoding="latin-1")
1469 expect = '%A0$=%C1$'
1470 self.assertEqual(expect, result)
1471
1472 given = ((b'\xa0\x24', b'\xc1\x24'),)
1473 expect = '%A0$=%C1$'
1474 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1475 encoding="latin-1")
1476
1477 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1478 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1479 result = urllib.parse.urlencode(given, True, safe=":$",
1480 encoding="latin-1")
1481 self.assertEqual(expect, result)
1482
Brett Cannon74bfd702003-04-25 09:39:47 +00001483class Pathname_Tests(unittest.TestCase):
1484 """Test pathname2url() and url2pathname()"""
1485
1486 def test_basic(self):
1487 # Make sure simple tests pass
1488 expected_path = os.path.join("parts", "of", "a", "path")
1489 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001490 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001491 self.assertEqual(expected_url, result,
1492 "pathname2url() failed; %s != %s" %
1493 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001494 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001495 self.assertEqual(expected_path, result,
1496 "url2pathame() failed; %s != %s" %
1497 (result, expected_path))
1498
1499 def test_quoting(self):
1500 # Test automatic quoting and unquoting works for pathnam2url() and
1501 # url2pathname() respectively
1502 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001503 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1504 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001505 self.assertEqual(expect, result,
1506 "pathname2url() failed; %s != %s" %
1507 (expect, result))
1508 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001509 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001510 self.assertEqual(expect, result,
1511 "url2pathname() failed; %s != %s" %
1512 (expect, result))
1513 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001514 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1515 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001516 self.assertEqual(expect, result,
1517 "pathname2url() failed; %s != %s" %
1518 (expect, result))
1519 given = "make+sure/using_unquote"
1520 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001521 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001522 self.assertEqual(expect, result,
1523 "url2pathname() failed; %s != %s" %
1524 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001525
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001526 @unittest.skipUnless(sys.platform == 'win32',
1527 'test specific to the urllib.url2path function.')
1528 def test_ntpath(self):
1529 given = ('/C:/', '///C:/', '/C|//')
1530 expect = 'C:\\'
1531 for url in given:
1532 result = urllib.request.url2pathname(url)
1533 self.assertEqual(expect, result,
1534 'urllib.request..url2pathname() failed; %s != %s' %
1535 (expect, result))
1536 given = '///C|/path'
1537 expect = 'C:\\path'
1538 result = urllib.request.url2pathname(given)
1539 self.assertEqual(expect, result,
1540 'urllib.request.url2pathname() failed; %s != %s' %
1541 (expect, result))
1542
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001543class Utility_Tests(unittest.TestCase):
1544 """Testcase to test the various utility functions in the urllib."""
1545
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001546 def test_thishost(self):
1547 """Test the urllib.request.thishost utility function returns a tuple"""
1548 self.assertIsInstance(urllib.request.thishost(), tuple)
1549
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001550
Xtreakc661b302019-05-19 19:10:06 +05301551class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001552 """Testcase to test the open method of URLopener class."""
1553
1554 def test_quoted_open(self):
1555 class DummyURLopener(urllib.request.URLopener):
1556 def open_spam(self, url):
1557 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001558 with support.check_warnings(
1559 ('DummyURLopener style of invoking requests is deprecated.',
1560 DeprecationWarning)):
1561 self.assertEqual(DummyURLopener().open(
1562 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001563
Ezio Melotti79b99db2013-02-21 02:41:42 +02001564 # test the safe characters are not quoted by urlopen
1565 self.assertEqual(DummyURLopener().open(
1566 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1567 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001568
Xtreakc661b302019-05-19 19:10:06 +05301569 @support.ignore_warnings(category=DeprecationWarning)
1570 def test_urlopener_retrieve_file(self):
1571 with support.temp_dir() as tmpdir:
1572 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1573 os.close(fd)
1574 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1575 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001576 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1577 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301578
1579 @support.ignore_warnings(category=DeprecationWarning)
1580 def test_urlopener_retrieve_remote(self):
1581 url = "http://www.python.org/file.txt"
1582 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1583 self.addCleanup(self.unfakehttp)
1584 filename, _ = urllib.request.URLopener().retrieve(url)
1585 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1586
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001587 @support.ignore_warnings(category=DeprecationWarning)
1588 def test_local_file_open(self):
1589 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1590 class DummyURLopener(urllib.request.URLopener):
1591 def open_local_file(self, url):
1592 return url
1593 for url in ('local_file://example', 'local-file://example'):
1594 self.assertRaises(OSError, urllib.request.urlopen, url)
1595 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1596 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1597 self.assertRaises(OSError, DummyURLopener().open, url)
1598 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1599
Xtreakc661b302019-05-19 19:10:06 +05301600
Senthil Kumarande49d642011-10-16 23:54:44 +08001601class RequestTests(unittest.TestCase):
1602 """Unit tests for urllib.request.Request."""
1603
1604 def test_default_values(self):
1605 Request = urllib.request.Request
1606 request = Request("http://www.python.org")
1607 self.assertEqual(request.get_method(), 'GET')
1608 request = Request("http://www.python.org", {})
1609 self.assertEqual(request.get_method(), 'POST')
1610
1611 def test_with_method_arg(self):
1612 Request = urllib.request.Request
1613 request = Request("http://www.python.org", method='HEAD')
1614 self.assertEqual(request.method, 'HEAD')
1615 self.assertEqual(request.get_method(), 'HEAD')
1616 request = Request("http://www.python.org", {}, method='HEAD')
1617 self.assertEqual(request.method, 'HEAD')
1618 self.assertEqual(request.get_method(), 'HEAD')
1619 request = Request("http://www.python.org", method='GET')
1620 self.assertEqual(request.get_method(), 'GET')
1621 request.method = 'HEAD'
1622 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001623
1624
Senthil Kumaran277e9092013-04-10 20:51:19 -07001625class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001626
Senthil Kumaran277e9092013-04-10 20:51:19 -07001627 def test_converting_drive_letter(self):
1628 self.assertEqual(url2pathname("///C|"), 'C:')
1629 self.assertEqual(url2pathname("///C:"), 'C:')
1630 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001631
Senthil Kumaran277e9092013-04-10 20:51:19 -07001632 def test_converting_when_no_drive_letter(self):
1633 # cannot end a raw string in \
1634 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1635 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1636
1637 def test_simple_compare(self):
1638 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1639 r'C:\foo\bar\spam.foo')
1640
1641 def test_non_ascii_drive_letter(self):
1642 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1643
1644 def test_roundtrip_url2pathname(self):
1645 list_of_paths = ['C:',
1646 r'\\\C\test\\',
1647 r'C:\foo\bar\spam.foo'
1648 ]
1649 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001650 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001651
1652class PathName2URLTests(unittest.TestCase):
1653
1654 def test_converting_drive_letter(self):
1655 self.assertEqual(pathname2url("C:"), '///C:')
1656 self.assertEqual(pathname2url("C:\\"), '///C:')
1657
1658 def test_converting_when_no_drive_letter(self):
1659 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1660 '/////folder/test/')
1661 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1662 '////folder/test/')
1663 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1664 '/folder/test/')
1665
1666 def test_simple_compare(self):
1667 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1668 "///C:/foo/bar/spam.foo" )
1669
1670 def test_long_drive_letter(self):
1671 self.assertRaises(IOError, pathname2url, "XX:\\")
1672
1673 def test_roundtrip_pathname2url(self):
1674 list_of_paths = ['///C:',
1675 '/////folder/test/',
1676 '///C:/foo/bar/spam.foo']
1677 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001678 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001679
1680if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001681 unittest.main()