blob: 2e82fc7b7b861b7598e08911bd1ff02cc83fde42 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Victor Stinnereb976e42019-06-12 04:07:38 +020059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Victor Stinnereb976e42019-06-12 04:07:38 +020093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Victor Stinnereb976e42019-06-12 04:07:38 +0200107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Victor Stinnereb976e42019-06-12 04:07:38 +0200110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def tearDown(self):
157 """Shut down the open object"""
158 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000159 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_interface(self):
162 # Make sure object returned by urlopen() has the specified methods
163 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000164 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000165 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "object returned by urlopen() lacks %s attribute" %
167 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_read(self):
170 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_readline(self):
173 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000174 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 "calling readline() after exhausting the file did not"
176 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_readlines(self):
179 lines_list = self.returned_obj.readlines()
180 self.assertEqual(len(lines_list), 1,
181 "readlines() returned the wrong number of lines")
182 self.assertEqual(lines_list[0], self.text,
183 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 def test_fileno(self):
186 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000187 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 self.assertEqual(os.read(file_num, len(self.text)), self.text,
189 "Reading on the file descriptor returned by fileno() "
190 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800193 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000194 # by the tearDown() method for the test
195 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700197 def test_headers(self):
198 self.assertIsInstance(self.returned_obj.headers, email.message.Message)
199
200 def test_url(self):
201 self.assertEqual(self.returned_obj.url, self.pathname)
202
203 def test_status(self):
204 self.assertIsNone(self.returned_obj.status)
205
Brett Cannon74bfd702003-04-25 09:39:47 +0000206 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000207 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000208
Brett Cannon74bfd702003-04-25 09:39:47 +0000209 def test_geturl(self):
210 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000211
Christian Heimes9bd667a2008-01-20 15:14:11 +0000212 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000213 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000214
Brett Cannon74bfd702003-04-25 09:39:47 +0000215 def test_iter(self):
216 # Test iterator
217 # Don't need to count number of iterations since test would fail the
218 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200219 # comparison.
220 # Use the iterator in the usual implicit way to test for ticket #4608.
221 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000222 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000223
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800224 def test_relativelocalfile(self):
225 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
226
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700227
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000228class ProxyTests(unittest.TestCase):
229
230 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000231 # Records changes to env vars
232 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000233 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000234 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000235 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000236 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000237
238 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000239 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000240 self.env.__exit__()
241 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000242
243 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000244 self.env.set('NO_PROXY', 'localhost')
245 proxies = urllib.request.getproxies_environment()
246 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000247 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800248 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700249 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800250 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700251 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
252 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
253
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700254 def test_proxy_cgi_ignore(self):
255 try:
256 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
257 proxies = urllib.request.getproxies_environment()
258 self.assertEqual('http://somewhere:3128', proxies['http'])
259 self.env.set('REQUEST_METHOD', 'GET')
260 proxies = urllib.request.getproxies_environment()
261 self.assertNotIn('http', proxies)
262 finally:
263 self.env.unset('REQUEST_METHOD')
264 self.env.unset('HTTP_PROXY')
265
Martin Panteraa279822016-04-30 01:03:40 +0000266 def test_proxy_bypass_environment_host_match(self):
267 bypass = urllib.request.proxy_bypass_environment
268 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800269 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000270 self.assertTrue(bypass('localhost'))
271 self.assertTrue(bypass('LocalHost')) # MixedCase
272 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200273 self.assertTrue(bypass('.localhost'))
Martin Panteraa279822016-04-30 01:03:40 +0000274 self.assertTrue(bypass('newdomain.com:1234'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200275 self.assertTrue(bypass('.newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800276 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200277 self.assertTrue(bypass('d.o.t'))
Martin Panteraa279822016-04-30 01:03:40 +0000278 self.assertTrue(bypass('anotherdomain.com:8888'))
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200279 self.assertTrue(bypass('.anotherdomain.com:8888'))
Martin Panteraa279822016-04-30 01:03:40 +0000280 self.assertTrue(bypass('www.newdomain.com:1234'))
281 self.assertFalse(bypass('prelocalhost'))
282 self.assertFalse(bypass('newdomain.com')) # no port
283 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700284
Serhiy Storchaka6a265f02020-01-05 14:14:31 +0200285 def test_proxy_bypass_environment_always_match(self):
286 bypass = urllib.request.proxy_bypass_environment
287 self.env.set('NO_PROXY', '*')
288 self.assertTrue(bypass('newdomain.com'))
289 self.assertTrue(bypass('newdomain.com:1234'))
290 self.env.set('NO_PROXY', '*, anotherdomain.com')
291 self.assertTrue(bypass('anotherdomain.com'))
292 self.assertFalse(bypass('newdomain.com'))
293 self.assertFalse(bypass('newdomain.com:1234'))
294
295 def test_proxy_bypass_environment_newline(self):
296 bypass = urllib.request.proxy_bypass_environment
297 self.env.set('NO_PROXY',
298 'localhost, anotherdomain.com, newdomain.com:1234')
299 self.assertFalse(bypass('localhost\n'))
300 self.assertFalse(bypass('anotherdomain.com:8888\n'))
301 self.assertFalse(bypass('newdomain.com:1234\n'))
302
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700303
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700304class ProxyTests_withOrderedEnv(unittest.TestCase):
305
306 def setUp(self):
307 # We need to test conditions, where variable order _is_ significant
308 self._saved_env = os.environ
309 # Monkey patch os.environ, start with empty fake environment
310 os.environ = collections.OrderedDict()
311
312 def tearDown(self):
313 os.environ = self._saved_env
314
315 def test_getproxies_environment_prefer_lowercase(self):
316 # Test lowercase preference with removal
317 os.environ['no_proxy'] = ''
318 os.environ['No_Proxy'] = 'localhost'
319 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
320 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
321 os.environ['http_proxy'] = ''
322 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
323 proxies = urllib.request.getproxies_environment()
324 self.assertEqual({}, proxies)
325 # Test lowercase preference of proxy bypass and correct matching including ports
326 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
327 os.environ['No_Proxy'] = 'xyz.com'
328 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
329 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
330 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
331 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
332 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
333 # Test lowercase preference with replacement
334 os.environ['http_proxy'] = 'http://somewhere:3128'
335 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
336 proxies = urllib.request.getproxies_environment()
337 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000338
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700339
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700340class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000341 """Test urlopen() opening a fake http connection."""
342
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000343 def check_read(self, ver):
344 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000345 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000346 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000347 self.assertEqual(fp.readline(), b"Hello!")
348 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000349 self.assertEqual(fp.geturl(), 'http://python.org/')
350 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000351 finally:
352 self.unfakehttp()
353
Senthil Kumaran26430412011-04-13 07:01:19 +0800354 def test_url_fragment(self):
355 # Issue #11703: geturl() omits fragments in the original URL.
356 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800357 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800358 try:
359 fp = urllib.request.urlopen(url)
360 self.assertEqual(fp.geturl(), url)
361 finally:
362 self.unfakehttp()
363
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800364 def test_willclose(self):
365 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800366 try:
367 resp = urlopen("http://www.python.org")
368 self.assertTrue(resp.fp.will_close)
369 finally:
370 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800371
Xtreak2fc936e2019-05-01 17:29:49 +0530372 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700373 def test_url_with_control_char_rejected(self):
374 for char_no in list(range(0, 0x21)) + [0x7f]:
375 char = chr(char_no)
376 schemeless_url = f"//localhost:7777/test{char}/"
377 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
378 try:
379 # We explicitly test urllib.request.urlopen() instead of the top
380 # level 'def urlopen()' function defined in this... (quite ugly)
381 # test suite. They use different url opening codepaths. Plain
382 # urlopen uses FancyURLOpener which goes via a codepath that
383 # calls urllib.parse.quote() on the URL which makes all of the
384 # above attempts at injection within the url _path_ safe.
385 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400386 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700387 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400388 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700389 urllib.request.urlopen(f"http:{schemeless_url}")
390 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400391 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700392 urllib.request.urlopen(f"https:{schemeless_url}")
393 # This code path quotes the URL so there is no injection.
394 resp = urlopen(f"http:{schemeless_url}")
395 self.assertNotIn(char, resp.geturl())
396 finally:
397 self.unfakehttp()
398
Xtreak2fc936e2019-05-01 17:29:49 +0530399 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700400 def test_url_with_newline_header_injection_rejected(self):
401 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
402 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
403 schemeless_url = "//" + host + ":8080/test/?test=a"
404 try:
405 # We explicitly test urllib.request.urlopen() instead of the top
406 # level 'def urlopen()' function defined in this... (quite ugly)
407 # test suite. They use different url opening codepaths. Plain
408 # urlopen uses FancyURLOpener which goes via a codepath that
409 # calls urllib.parse.quote() on the URL which makes all of the
410 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400411 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700412 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400413 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700414 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400415 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700416 urllib.request.urlopen(f"https:{schemeless_url}")
417 # This code path quotes the URL so there is no injection.
418 resp = urlopen(f"http:{schemeless_url}")
419 self.assertNotIn(' ', resp.geturl())
420 self.assertNotIn('\r', resp.geturl())
421 self.assertNotIn('\n', resp.geturl())
422 finally:
423 self.unfakehttp()
424
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000425 def test_read_0_9(self):
426 # "0.9" response accepted (but not "simple responses" without
427 # a status line)
428 self.check_read(b"0.9")
429
430 def test_read_1_0(self):
431 self.check_read(b"1.0")
432
433 def test_read_1_1(self):
434 self.check_read(b"1.1")
435
Christian Heimes57dddfb2008-01-02 18:30:52 +0000436 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200437 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000438 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
439Date: Wed, 02 Jan 2008 03:03:54 GMT
440Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
441Connection: close
442Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200443''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000444 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200445 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000446 finally:
447 self.unfakehttp()
448
guido@google.coma119df92011-03-29 11:41:02 -0700449 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200450 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700451 self.fakehttp(b'''HTTP/1.1 302 Found
452Date: Wed, 02 Jan 2008 03:03:54 GMT
453Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
454Location: file://guidocomputer.athome.com:/python/license
455Connection: close
456Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200457''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700458 try:
Martin Pantera0370222016-02-04 06:01:35 +0000459 msg = "Redirection to url 'file:"
460 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
461 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700462 finally:
463 self.unfakehttp()
464
Martin Pantera0370222016-02-04 06:01:35 +0000465 def test_redirect_limit_independent(self):
466 # Ticket #12923: make sure independent requests each use their
467 # own retry limit.
468 for i in range(FancyURLopener().maxtries):
469 self.fakehttp(b'''HTTP/1.1 302 Found
470Location: file://guidocomputer.athome.com:/python/license
471Connection: close
Victor Stinnereb976e42019-06-12 04:07:38 +0200472''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000473 try:
474 self.assertRaises(urllib.error.HTTPError, urlopen,
475 "http://something")
476 finally:
477 self.unfakehttp()
478
Guido van Rossumd8faa362007-04-27 19:54:29 +0000479 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200480 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000481 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000482 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000483 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200484 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000485 finally:
486 self.unfakehttp()
487
Senthil Kumaranf5776862012-10-21 13:30:02 -0700488 def test_missing_localfile(self):
489 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700490 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700491 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700492 self.assertTrue(e.exception.filename)
493 self.assertTrue(e.exception.reason)
494
495 def test_file_notexists(self):
496 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700497 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700498 try:
499 self.assertTrue(os.path.exists(tmp_file))
500 with urlopen(tmp_fileurl) as fobj:
501 self.assertTrue(fobj)
502 finally:
503 os.close(fd)
504 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700505 self.assertFalse(os.path.exists(tmp_file))
506 with self.assertRaises(urllib.error.URLError):
507 urlopen(tmp_fileurl)
508
509 def test_ftp_nohost(self):
510 test_ftp_url = 'ftp:///path'
511 with self.assertRaises(urllib.error.URLError) as e:
512 urlopen(test_ftp_url)
513 self.assertFalse(e.exception.filename)
514 self.assertTrue(e.exception.reason)
515
516 def test_ftp_nonexisting(self):
517 with self.assertRaises(urllib.error.URLError) as e:
518 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
519 self.assertFalse(e.exception.filename)
520 self.assertTrue(e.exception.reason)
521
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700522 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
523 def test_ftp_cache_pruning(self):
524 self.fakeftp()
525 try:
526 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
527 urlopen('ftp://localhost')
528 finally:
529 self.unfakeftp()
530
Senthil Kumarande0eb242010-08-01 17:53:37 +0000531 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000532 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000533 try:
534 fp = urlopen("http://user:pass@python.org/")
535 self.assertEqual(fp.readline(), b"Hello!")
536 self.assertEqual(fp.readline(), b"")
537 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
538 self.assertEqual(fp.getcode(), 200)
539 finally:
540 self.unfakehttp()
541
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800542 def test_userpass_inurl_w_spaces(self):
543 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
544 try:
545 userpass = "a b:c d"
546 url = "http://{}@python.org/".format(userpass)
547 fakehttp_wrapper = http.client.HTTPConnection
548 authorization = ("Authorization: Basic %s\r\n" %
549 b64encode(userpass.encode("ASCII")).decode("ASCII"))
550 fp = urlopen(url)
551 # The authorization header must be in place
552 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
553 self.assertEqual(fp.readline(), b"Hello!")
554 self.assertEqual(fp.readline(), b"")
555 # the spaces are quoted in URL so no match
556 self.assertNotEqual(fp.geturl(), url)
557 self.assertEqual(fp.getcode(), 200)
558 finally:
559 self.unfakehttp()
560
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700561 def test_URLopener_deprecation(self):
562 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700563 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700564
Antoine Pitrou07df6552014-11-02 17:23:14 +0100565 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800566 def test_cafile_and_context(self):
567 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200568 with support.check_warnings(('', DeprecationWarning)):
569 with self.assertRaises(ValueError):
570 urllib.request.urlopen(
571 "https://localhost", cafile="/nonexistent/path", context=context
572 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800573
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700574
Antoine Pitroudf204be2012-11-24 17:59:08 +0100575class urlopen_DataTests(unittest.TestCase):
576 """Test urlopen() opening a data URL."""
577
578 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200579 # clear _opener global variable
580 self.addCleanup(urllib.request.urlcleanup)
581
Antoine Pitroudf204be2012-11-24 17:59:08 +0100582 # text containing URL special- and unicode-characters
583 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
584 # 2x1 pixel RGB PNG image with one black and one white pixel
585 self.image = (
586 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
587 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
588 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
589 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
590
591 self.text_url = (
592 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
593 "D%26%20%C3%B6%20%C3%84%20")
594 self.text_url_base64 = (
595 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
596 "sJT0mIPYgxCA%3D")
597 # base64 encoded data URL that contains ignorable spaces,
598 # such as "\n", " ", "%0A", and "%20".
599 self.image_url = (
600 "\n"
601 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
602 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
603
604 self.text_url_resp = urllib.request.urlopen(self.text_url)
605 self.text_url_base64_resp = urllib.request.urlopen(
606 self.text_url_base64)
607 self.image_url_resp = urllib.request.urlopen(self.image_url)
608
609 def test_interface(self):
610 # Make sure object returned by urlopen() has the specified methods
611 for attr in ("read", "readline", "readlines",
612 "close", "info", "geturl", "getcode", "__iter__"):
613 self.assertTrue(hasattr(self.text_url_resp, attr),
614 "object returned by urlopen() lacks %s attribute" %
615 attr)
616
617 def test_info(self):
618 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
619 self.assertEqual(self.text_url_base64_resp.info().get_params(),
620 [('text/plain', ''), ('charset', 'ISO-8859-1')])
621 self.assertEqual(self.image_url_resp.info()['content-length'],
622 str(len(self.image)))
623 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
624 [('text/plain', ''), ('charset', 'US-ASCII')])
625
626 def test_geturl(self):
627 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
628 self.assertEqual(self.text_url_base64_resp.geturl(),
629 self.text_url_base64)
630 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
631
632 def test_read_text(self):
633 self.assertEqual(self.text_url_resp.read().decode(
634 dict(self.text_url_resp.info().get_params())['charset']), self.text)
635
636 def test_read_text_base64(self):
637 self.assertEqual(self.text_url_base64_resp.read().decode(
638 dict(self.text_url_base64_resp.info().get_params())['charset']),
639 self.text)
640
641 def test_read_image(self):
642 self.assertEqual(self.image_url_resp.read(), self.image)
643
644 def test_missing_comma(self):
645 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
646
647 def test_invalid_base64_data(self):
648 # missing padding character
649 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
650
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700651
Brett Cannon19691362003-04-29 05:08:06 +0000652class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000653 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000654
Brett Cannon19691362003-04-29 05:08:06 +0000655 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200656 # clear _opener global variable
657 self.addCleanup(urllib.request.urlcleanup)
658
Georg Brandl5a650a22005-08-26 08:51:34 +0000659 # Create a list of temporary files. Each item in the list is a file
660 # name (absolute path or relative to the current working directory).
661 # All files in this list will be deleted in the tearDown method. Note,
662 # this only helps to makes sure temporary files get deleted, but it
663 # does nothing about trying to close files that may still be open. It
664 # is the responsibility of the developer to properly close files even
665 # when exceptional conditions occur.
666 self.tempFiles = []
667
Brett Cannon19691362003-04-29 05:08:06 +0000668 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000669 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000670 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000671 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000672 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000673 FILE.write(self.text)
674 FILE.close()
675 finally:
676 try: FILE.close()
677 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000678
679 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000680 # Delete the temporary files.
681 for each in self.tempFiles:
682 try: os.remove(each)
683 except: pass
684
685 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000686 filePath = os.path.abspath(filePath)
687 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000688 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000689 except UnicodeEncodeError:
690 raise unittest.SkipTest("filePath is not encodable to utf8")
691 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000692
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000693 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000694 """Creates a new temporary file containing the specified data,
695 registers the file for deletion during the test fixture tear down, and
696 returns the absolute path of the file."""
697
698 newFd, newFilePath = tempfile.mkstemp()
699 try:
700 self.registerFileForCleanUp(newFilePath)
701 newFile = os.fdopen(newFd, "wb")
702 newFile.write(data)
703 newFile.close()
704 finally:
705 try: newFile.close()
706 except: pass
707 return newFilePath
708
709 def registerFileForCleanUp(self, fileName):
710 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000711
712 def test_basic(self):
713 # Make sure that a local file just gets its own location returned and
714 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000715 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000716 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000717 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000718 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000719 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000720
721 def test_copy(self):
722 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000723 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000724 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000725 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000726 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000727 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000728 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000729 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000730 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000731 try:
732 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000733 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000734 finally:
735 try: FILE.close()
736 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000737 self.assertEqual(self.text, text)
738
739 def test_reporthook(self):
740 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700741 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
742 self.assertIsInstance(block_count, int)
743 self.assertIsInstance(block_read_size, int)
744 self.assertIsInstance(file_size, int)
745 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000746 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000747 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000748 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000749 urllib.request.urlretrieve(
750 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000751 second_temp, hooktester)
752
753 def test_reporthook_0_bytes(self):
754 # Test on zero length file. Should call reporthook only 1 time.
755 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700756 def hooktester(block_count, block_read_size, file_size, _report=report):
757 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000758 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000759 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000760 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000761 self.assertEqual(len(report), 1)
762 self.assertEqual(report[0][2], 0)
763
764 def test_reporthook_5_bytes(self):
765 # Test on 5 byte file. Should call reporthook only 2 times (once when
766 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700767 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000768 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700769 def hooktester(block_count, block_read_size, file_size, _report=report):
770 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000771 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000772 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000773 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000774 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800775 self.assertEqual(report[0][2], 5)
776 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000777
778 def test_reporthook_8193_bytes(self):
779 # Test on 8193 byte file. Should call reporthook only 3 times (once
780 # when the "network connection" is established, once for the next 8192
781 # bytes, and once for the last byte).
782 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700783 def hooktester(block_count, block_read_size, file_size, _report=report):
784 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000785 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000786 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000787 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000788 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800789 self.assertEqual(report[0][2], 8193)
790 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700791 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800792 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000793
Senthil Kumarance260142011-11-01 01:35:17 +0800794
795class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
796 """Test urllib.urlretrieve() using fake http connections"""
797
798 def test_short_content_raises_ContentTooShortError(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200799 self.addCleanup(urllib.request.urlcleanup)
800
Senthil Kumarance260142011-11-01 01:35:17 +0800801 self.fakehttp(b'''HTTP/1.1 200 OK
802Date: Wed, 02 Jan 2008 03:03:54 GMT
803Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
804Connection: close
805Content-Length: 100
806Content-Type: text/html; charset=iso-8859-1
807
808FF
809''')
810
811 def _reporthook(par1, par2, par3):
812 pass
813
814 with self.assertRaises(urllib.error.ContentTooShortError):
815 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100816 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800817 reporthook=_reporthook)
818 finally:
819 self.unfakehttp()
820
821 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200822 self.addCleanup(urllib.request.urlcleanup)
823
Senthil Kumarance260142011-11-01 01:35:17 +0800824 self.fakehttp(b'''HTTP/1.1 200 OK
825Date: Wed, 02 Jan 2008 03:03:54 GMT
826Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
827Connection: close
828Content-Length: 100
829Content-Type: text/html; charset=iso-8859-1
830
831FF
832''')
833 with self.assertRaises(urllib.error.ContentTooShortError):
834 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100835 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800836 finally:
837 self.unfakehttp()
838
839
Brett Cannon74bfd702003-04-25 09:39:47 +0000840class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400841 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000842
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530843 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000844 character you write it as '%' + <2 character US-ASCII hex value>.
845 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
846 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000847
848 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000849
Brett Cannon74bfd702003-04-25 09:39:47 +0000850 Reserved characters : ";/?:@&=+$,"
851 Have special meaning in URIs and must be escaped if not being used for
852 their special meaning
853 Data characters : letters, digits, and "-_.!~*'()"
854 Unreserved and do not need to be escaped; can be, though, if desired
855 Control characters : 0x00 - 0x1F, 0x7F
856 Have no use in URIs so must be escaped
857 space : 0x20
858 Must be escaped
859 Delimiters : '<>#%"'
860 Must be escaped
861 Unwise : "{}|\^[]`"
862 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000863
Brett Cannon74bfd702003-04-25 09:39:47 +0000864 """
865
866 def test_never_quote(self):
867 # Make sure quote() does not quote letters, digits, and "_,.-"
868 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
869 "abcdefghijklmnopqrstuvwxyz",
870 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530871 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000872 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000873 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000874 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000875 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000876 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000877 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000878
879 def test_default_safe(self):
880 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000881 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000882
883 def test_safe(self):
884 # Test setting 'safe' parameter does what it should do
885 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000886 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000887 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000888 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000889 result = urllib.parse.quote_plus(quote_by_default,
890 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000891 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000892 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000893 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000894 # Safe expressed as bytes rather than str
895 result = urllib.parse.quote(quote_by_default, safe=b"<>")
896 self.assertEqual(quote_by_default, result,
897 "using quote(): %r != %r" % (quote_by_default, result))
898 # "Safe" non-ASCII characters should have no effect
899 # (Since URIs are not allowed to have non-ASCII characters)
900 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
901 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
902 self.assertEqual(expect, result,
903 "using quote(): %r != %r" %
904 (expect, result))
905 # Same as above, but using a bytes rather than str
906 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
907 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
908 self.assertEqual(expect, result,
909 "using quote(): %r != %r" %
910 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000911
912 def test_default_quoting(self):
913 # Make sure all characters that should be quoted are by default sans
914 # space (separate test for that).
915 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400916 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000917 should_quote.append(chr(127)) # For 0x7F
918 should_quote = ''.join(should_quote)
919 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000920 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000921 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000922 "using quote(): "
923 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000924 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000925 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000926 self.assertEqual(hexescape(char), result,
927 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000928 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000929 (char, hexescape(char), result))
930 del should_quote
931 partial_quote = "ab[]cd"
932 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000933 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000934 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000935 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800936 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000937 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000938 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000939
940 def test_quoting_space(self):
941 # Make sure quote() and quote_plus() handle spaces as specified in
942 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000943 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000944 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000945 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000946 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000947 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000948 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000949 given = "a b cd e f"
950 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000951 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000952 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000953 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000954 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000955 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000956 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000957 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000958
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000959 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000960 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000961 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000962 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000963 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000964 # Test with bytes
965 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
966 'alpha%2Bbeta+gamma')
967 # Test with safe bytes
968 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
969 'alpha+beta+gamma')
970
971 def test_quote_bytes(self):
972 # Bytes should quote directly to percent-encoded values
973 given = b"\xa2\xd8ab\xff"
974 expect = "%A2%D8ab%FF"
975 result = urllib.parse.quote(given)
976 self.assertEqual(expect, result,
977 "using quote(): %r != %r" % (expect, result))
978 # Encoding argument should raise type error on bytes input
979 self.assertRaises(TypeError, urllib.parse.quote, given,
980 encoding="latin-1")
981 # quote_from_bytes should work the same
982 result = urllib.parse.quote_from_bytes(given)
983 self.assertEqual(expect, result,
984 "using quote_from_bytes(): %r != %r"
985 % (expect, result))
986
987 def test_quote_with_unicode(self):
988 # Characters in Latin-1 range, encoded by default in UTF-8
989 given = "\xa2\xd8ab\xff"
990 expect = "%C2%A2%C3%98ab%C3%BF"
991 result = urllib.parse.quote(given)
992 self.assertEqual(expect, result,
993 "using quote(): %r != %r" % (expect, result))
994 # Characters in Latin-1 range, encoded by with None (default)
995 result = urllib.parse.quote(given, encoding=None, errors=None)
996 self.assertEqual(expect, result,
997 "using quote(): %r != %r" % (expect, result))
998 # Characters in Latin-1 range, encoded with Latin-1
999 given = "\xa2\xd8ab\xff"
1000 expect = "%A2%D8ab%FF"
1001 result = urllib.parse.quote(given, encoding="latin-1")
1002 self.assertEqual(expect, result,
1003 "using quote(): %r != %r" % (expect, result))
1004 # Characters in BMP, encoded by default in UTF-8
1005 given = "\u6f22\u5b57" # "Kanji"
1006 expect = "%E6%BC%A2%E5%AD%97"
1007 result = urllib.parse.quote(given)
1008 self.assertEqual(expect, result,
1009 "using quote(): %r != %r" % (expect, result))
1010 # Characters in BMP, encoded with Latin-1
1011 given = "\u6f22\u5b57"
1012 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1013 encoding="latin-1")
1014 # Characters in BMP, encoded with Latin-1, with replace error handling
1015 given = "\u6f22\u5b57"
1016 expect = "%3F%3F" # "??"
1017 result = urllib.parse.quote(given, encoding="latin-1",
1018 errors="replace")
1019 self.assertEqual(expect, result,
1020 "using quote(): %r != %r" % (expect, result))
1021 # Characters in BMP, Latin-1, with xmlcharref error handling
1022 given = "\u6f22\u5b57"
1023 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1024 result = urllib.parse.quote(given, encoding="latin-1",
1025 errors="xmlcharrefreplace")
1026 self.assertEqual(expect, result,
1027 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001028
Georg Brandlfaf41492009-05-26 18:31:11 +00001029 def test_quote_plus_with_unicode(self):
1030 # Encoding (latin-1) test for quote_plus
1031 given = "\xa2\xd8 \xff"
1032 expect = "%A2%D8+%FF"
1033 result = urllib.parse.quote_plus(given, encoding="latin-1")
1034 self.assertEqual(expect, result,
1035 "using quote_plus(): %r != %r" % (expect, result))
1036 # Errors test for quote_plus
1037 given = "ab\u6f22\u5b57 cd"
1038 expect = "ab%3F%3F+cd"
1039 result = urllib.parse.quote_plus(given, encoding="latin-1",
1040 errors="replace")
1041 self.assertEqual(expect, result,
1042 "using quote_plus(): %r != %r" % (expect, result))
1043
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001044
Brett Cannon74bfd702003-04-25 09:39:47 +00001045class UnquotingTests(unittest.TestCase):
1046 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001047
Brett Cannon74bfd702003-04-25 09:39:47 +00001048 See the doc string for quoting_Tests for details on quoting and such.
1049
1050 """
1051
1052 def test_unquoting(self):
1053 # Make sure unquoting of all ASCII values works
1054 escape_list = []
1055 for num in range(128):
1056 given = hexescape(chr(num))
1057 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001058 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001059 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001060 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001061 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001062 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001063 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001064 (expect, result))
1065 escape_list.append(given)
1066 escape_string = ''.join(escape_list)
1067 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001068 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001069 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001070 "using unquote(): not all characters escaped: "
1071 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001072 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1073 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Brett Cannon74bfd702003-04-25 09:39:47 +00001074
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001075 def test_unquoting_badpercent(self):
1076 # Test unquoting on bad percent-escapes
1077 given = '%xab'
1078 expect = given
1079 result = urllib.parse.unquote(given)
1080 self.assertEqual(expect, result, "using unquote(): %r != %r"
1081 % (expect, result))
1082 given = '%x'
1083 expect = given
1084 result = urllib.parse.unquote(given)
1085 self.assertEqual(expect, result, "using unquote(): %r != %r"
1086 % (expect, result))
1087 given = '%'
1088 expect = given
1089 result = urllib.parse.unquote(given)
1090 self.assertEqual(expect, result, "using unquote(): %r != %r"
1091 % (expect, result))
1092 # unquote_to_bytes
1093 given = '%xab'
1094 expect = bytes(given, 'ascii')
1095 result = urllib.parse.unquote_to_bytes(given)
1096 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1097 % (expect, result))
1098 given = '%x'
1099 expect = bytes(given, 'ascii')
1100 result = urllib.parse.unquote_to_bytes(given)
1101 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1102 % (expect, result))
1103 given = '%'
1104 expect = bytes(given, 'ascii')
1105 result = urllib.parse.unquote_to_bytes(given)
1106 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1107 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001108 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1109 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001110
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001111 def test_unquoting_mixed_case(self):
1112 # Test unquoting on mixed-case hex digits in the percent-escapes
1113 given = '%Ab%eA'
1114 expect = b'\xab\xea'
1115 result = urllib.parse.unquote_to_bytes(given)
1116 self.assertEqual(expect, result,
1117 "using unquote_to_bytes(): %r != %r"
1118 % (expect, result))
1119
Brett Cannon74bfd702003-04-25 09:39:47 +00001120 def test_unquoting_parts(self):
1121 # Make sure unquoting works when have non-quoted characters
1122 # interspersed
1123 given = 'ab%sd' % hexescape('c')
1124 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001125 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001126 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001127 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001128 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001129 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001130 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001131
Brett Cannon74bfd702003-04-25 09:39:47 +00001132 def test_unquoting_plus(self):
1133 # Test difference between unquote() and unquote_plus()
1134 given = "are+there+spaces..."
1135 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001136 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001137 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001138 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001139 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001140 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001141 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001142 "using unquote_plus(): %r != %r" % (expect, result))
1143
1144 def test_unquote_to_bytes(self):
1145 given = 'br%C3%BCckner_sapporo_20050930.doc'
1146 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1147 result = urllib.parse.unquote_to_bytes(given)
1148 self.assertEqual(expect, result,
1149 "using unquote_to_bytes(): %r != %r"
1150 % (expect, result))
1151 # Test on a string with unescaped non-ASCII characters
1152 # (Technically an invalid URI; expect those characters to be UTF-8
1153 # encoded).
1154 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1155 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1156 self.assertEqual(expect, result,
1157 "using unquote_to_bytes(): %r != %r"
1158 % (expect, result))
1159 # Test with a bytes as input
1160 given = b'%A2%D8ab%FF'
1161 expect = b'\xa2\xd8ab\xff'
1162 result = urllib.parse.unquote_to_bytes(given)
1163 self.assertEqual(expect, result,
1164 "using unquote_to_bytes(): %r != %r"
1165 % (expect, result))
1166 # Test with a bytes as input, with unescaped non-ASCII bytes
1167 # (Technically an invalid URI; expect those bytes to be preserved)
1168 given = b'%A2\xd8ab%FF'
1169 expect = b'\xa2\xd8ab\xff'
1170 result = urllib.parse.unquote_to_bytes(given)
1171 self.assertEqual(expect, result,
1172 "using unquote_to_bytes(): %r != %r"
1173 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001174
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001175 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001176 # Characters in the Latin-1 range, encoded with UTF-8
1177 given = 'br%C3%BCckner_sapporo_20050930.doc'
1178 expect = 'br\u00fcckner_sapporo_20050930.doc'
1179 result = urllib.parse.unquote(given)
1180 self.assertEqual(expect, result,
1181 "using unquote(): %r != %r" % (expect, result))
1182 # Characters in the Latin-1 range, encoded with None (default)
1183 result = urllib.parse.unquote(given, encoding=None, errors=None)
1184 self.assertEqual(expect, result,
1185 "using unquote(): %r != %r" % (expect, result))
1186
1187 # Characters in the Latin-1 range, encoded with Latin-1
1188 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1189 encoding="latin-1")
1190 expect = 'br\u00fcckner_sapporo_20050930.doc'
1191 self.assertEqual(expect, result,
1192 "using unquote(): %r != %r" % (expect, result))
1193
1194 # Characters in BMP, encoded with UTF-8
1195 given = "%E6%BC%A2%E5%AD%97"
1196 expect = "\u6f22\u5b57" # "Kanji"
1197 result = urllib.parse.unquote(given)
1198 self.assertEqual(expect, result,
1199 "using unquote(): %r != %r" % (expect, result))
1200
1201 # Decode with UTF-8, invalid sequence
1202 given = "%F3%B1"
1203 expect = "\ufffd" # Replacement character
1204 result = urllib.parse.unquote(given)
1205 self.assertEqual(expect, result,
1206 "using unquote(): %r != %r" % (expect, result))
1207
1208 # Decode with UTF-8, invalid sequence, replace errors
1209 result = urllib.parse.unquote(given, errors="replace")
1210 self.assertEqual(expect, result,
1211 "using unquote(): %r != %r" % (expect, result))
1212
1213 # Decode with UTF-8, invalid sequence, ignoring errors
1214 given = "%F3%B1"
1215 expect = ""
1216 result = urllib.parse.unquote(given, errors="ignore")
1217 self.assertEqual(expect, result,
1218 "using unquote(): %r != %r" % (expect, result))
1219
1220 # A mix of non-ASCII and percent-encoded characters, UTF-8
1221 result = urllib.parse.unquote("\u6f22%C3%BC")
1222 expect = '\u6f22\u00fc'
1223 self.assertEqual(expect, result,
1224 "using unquote(): %r != %r" % (expect, result))
1225
1226 # A mix of non-ASCII and percent-encoded characters, Latin-1
1227 # (Note, the string contains non-Latin-1-representable characters)
1228 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1229 expect = '\u6f22\u00fc'
1230 self.assertEqual(expect, result,
1231 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001232
Stein Karlsenaad2ee02019-10-14 12:36:29 +02001233 def test_unquoting_with_bytes_input(self):
1234 # ASCII characters decoded to a string
1235 given = b'blueberryjam'
1236 expect = 'blueberryjam'
1237 result = urllib.parse.unquote(given)
1238 self.assertEqual(expect, result,
1239 "using unquote(): %r != %r" % (expect, result))
1240
1241 # A mix of non-ASCII hex-encoded characters and ASCII characters
1242 given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
1243 expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
1244 result = urllib.parse.unquote(given)
1245 self.assertEqual(expect, result,
1246 "using unquote(): %r != %r" % (expect, result))
1247
1248 # A mix of non-ASCII percent-encoded characters and ASCII characters
1249 given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
1250 expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
1251 result = urllib.parse.unquote(given)
1252 self.assertEqual(expect, result,
1253 "using unquote(): %r != %r" % (expect, result))
1254
1255
Brett Cannon74bfd702003-04-25 09:39:47 +00001256class urlencode_Tests(unittest.TestCase):
1257 """Tests for urlencode()"""
1258
1259 def help_inputtype(self, given, test_type):
1260 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001261
Brett Cannon74bfd702003-04-25 09:39:47 +00001262 'given' must lead to only the pairs:
1263 * 1st, 1
1264 * 2nd, 2
1265 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001266
Brett Cannon74bfd702003-04-25 09:39:47 +00001267 Test cannot assume anything about order. Docs make no guarantee and
1268 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001269
Brett Cannon74bfd702003-04-25 09:39:47 +00001270 """
1271 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001272 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001273 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001274 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001275 "testing %s: %s not found in %s" %
1276 (test_type, expected, result))
1277 self.assertEqual(result.count('&'), 2,
1278 "testing %s: expected 2 '&'s; got %s" %
1279 (test_type, result.count('&')))
1280 amp_location = result.index('&')
1281 on_amp_left = result[amp_location - 1]
1282 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001283 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001284 "testing %s: '&' not located in proper place in %s" %
1285 (test_type, result))
1286 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1287 "testing %s: "
1288 "unexpected number of characters: %s != %s" %
1289 (test_type, len(result), (5 * 3) + 2))
1290
1291 def test_using_mapping(self):
1292 # Test passing in a mapping object as an argument.
1293 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1294 "using dict as input type")
1295
1296 def test_using_sequence(self):
1297 # Test passing in a sequence of two-item sequences as an argument.
1298 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1299 "using sequence of two-item tuples as input")
1300
1301 def test_quoting(self):
1302 # Make sure keys and values are quoted using quote_plus()
1303 given = {"&":"="}
1304 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001305 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001306 self.assertEqual(expect, result)
1307 given = {"key name":"A bunch of pluses"}
1308 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001309 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001310 self.assertEqual(expect, result)
1311
1312 def test_doseq(self):
1313 # Test that passing True for 'doseq' parameter works correctly
1314 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001315 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1316 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001317 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001318 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001319 for value in given["sequence"]:
1320 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001321 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001322 self.assertEqual(result.count('&'), 2,
1323 "Expected 2 '&'s, got %s" % result.count('&'))
1324
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001325 def test_empty_sequence(self):
1326 self.assertEqual("", urllib.parse.urlencode({}))
1327 self.assertEqual("", urllib.parse.urlencode([]))
1328
1329 def test_nonstring_values(self):
1330 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1331 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1332
1333 def test_nonstring_seq_values(self):
1334 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1335 self.assertEqual("a=None&a=a",
1336 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001337 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001338 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001339 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001340
Senthil Kumarandf022da2010-07-03 17:48:22 +00001341 def test_urlencode_encoding(self):
1342 # ASCII encoding. Expect %3F with errors="replace'
1343 given = (('\u00a0', '\u00c1'),)
1344 expect = '%3F=%3F'
1345 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1346 self.assertEqual(expect, result)
1347
1348 # Default is UTF-8 encoding.
1349 given = (('\u00a0', '\u00c1'),)
1350 expect = '%C2%A0=%C3%81'
1351 result = urllib.parse.urlencode(given)
1352 self.assertEqual(expect, result)
1353
1354 # Latin-1 encoding.
1355 given = (('\u00a0', '\u00c1'),)
1356 expect = '%A0=%C1'
1357 result = urllib.parse.urlencode(given, encoding="latin-1")
1358 self.assertEqual(expect, result)
1359
1360 def test_urlencode_encoding_doseq(self):
1361 # ASCII Encoding. Expect %3F with errors="replace'
1362 given = (('\u00a0', '\u00c1'),)
1363 expect = '%3F=%3F'
1364 result = urllib.parse.urlencode(given, doseq=True,
1365 encoding="ASCII", errors="replace")
1366 self.assertEqual(expect, result)
1367
1368 # ASCII Encoding. On a sequence of values.
1369 given = (("\u00a0", (1, "\u00c1")),)
1370 expect = '%3F=1&%3F=%3F'
1371 result = urllib.parse.urlencode(given, True,
1372 encoding="ASCII", errors="replace")
1373 self.assertEqual(expect, result)
1374
1375 # Utf-8
1376 given = (("\u00a0", "\u00c1"),)
1377 expect = '%C2%A0=%C3%81'
1378 result = urllib.parse.urlencode(given, True)
1379 self.assertEqual(expect, result)
1380
1381 given = (("\u00a0", (42, "\u00c1")),)
1382 expect = '%C2%A0=42&%C2%A0=%C3%81'
1383 result = urllib.parse.urlencode(given, True)
1384 self.assertEqual(expect, result)
1385
1386 # latin-1
1387 given = (("\u00a0", "\u00c1"),)
1388 expect = '%A0=%C1'
1389 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1390 self.assertEqual(expect, result)
1391
1392 given = (("\u00a0", (42, "\u00c1")),)
1393 expect = '%A0=42&%A0=%C1'
1394 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1395 self.assertEqual(expect, result)
1396
1397 def test_urlencode_bytes(self):
1398 given = ((b'\xa0\x24', b'\xc1\x24'),)
1399 expect = '%A0%24=%C1%24'
1400 result = urllib.parse.urlencode(given)
1401 self.assertEqual(expect, result)
1402 result = urllib.parse.urlencode(given, True)
1403 self.assertEqual(expect, result)
1404
1405 # Sequence of values
1406 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1407 expect = '%A0%24=42&%A0%24=%C1%24'
1408 result = urllib.parse.urlencode(given, True)
1409 self.assertEqual(expect, result)
1410
1411 def test_urlencode_encoding_safe_parameter(self):
1412
1413 # Send '$' (\x24) as safe character
1414 # Default utf-8 encoding
1415
1416 given = ((b'\xa0\x24', b'\xc1\x24'),)
1417 result = urllib.parse.urlencode(given, safe=":$")
1418 expect = '%A0$=%C1$'
1419 self.assertEqual(expect, result)
1420
1421 given = ((b'\xa0\x24', b'\xc1\x24'),)
1422 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1423 expect = '%A0$=%C1$'
1424 self.assertEqual(expect, result)
1425
1426 # Safe parameter in sequence
1427 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1428 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1429 result = urllib.parse.urlencode(given, True, safe=":$")
1430 self.assertEqual(expect, result)
1431
1432 # Test all above in latin-1 encoding
1433
1434 given = ((b'\xa0\x24', b'\xc1\x24'),)
1435 result = urllib.parse.urlencode(given, safe=":$",
1436 encoding="latin-1")
1437 expect = '%A0$=%C1$'
1438 self.assertEqual(expect, result)
1439
1440 given = ((b'\xa0\x24', b'\xc1\x24'),)
1441 expect = '%A0$=%C1$'
1442 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1443 encoding="latin-1")
1444
1445 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1446 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1447 result = urllib.parse.urlencode(given, True, safe=":$",
1448 encoding="latin-1")
1449 self.assertEqual(expect, result)
1450
Brett Cannon74bfd702003-04-25 09:39:47 +00001451class Pathname_Tests(unittest.TestCase):
1452 """Test pathname2url() and url2pathname()"""
1453
1454 def test_basic(self):
1455 # Make sure simple tests pass
1456 expected_path = os.path.join("parts", "of", "a", "path")
1457 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001458 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001459 self.assertEqual(expected_url, result,
1460 "pathname2url() failed; %s != %s" %
1461 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001462 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001463 self.assertEqual(expected_path, result,
1464 "url2pathame() failed; %s != %s" %
1465 (result, expected_path))
1466
1467 def test_quoting(self):
1468 # Test automatic quoting and unquoting works for pathnam2url() and
1469 # url2pathname() respectively
1470 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001471 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1472 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001473 self.assertEqual(expect, result,
1474 "pathname2url() failed; %s != %s" %
1475 (expect, result))
1476 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001477 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001478 self.assertEqual(expect, result,
1479 "url2pathname() failed; %s != %s" %
1480 (expect, result))
1481 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001482 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1483 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001484 self.assertEqual(expect, result,
1485 "pathname2url() failed; %s != %s" %
1486 (expect, result))
1487 given = "make+sure/using_unquote"
1488 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001489 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001490 self.assertEqual(expect, result,
1491 "url2pathname() failed; %s != %s" %
1492 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001493
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001494 @unittest.skipUnless(sys.platform == 'win32',
1495 'test specific to the urllib.url2path function.')
1496 def test_ntpath(self):
1497 given = ('/C:/', '///C:/', '/C|//')
1498 expect = 'C:\\'
1499 for url in given:
1500 result = urllib.request.url2pathname(url)
1501 self.assertEqual(expect, result,
1502 'urllib.request..url2pathname() failed; %s != %s' %
1503 (expect, result))
1504 given = '///C|/path'
1505 expect = 'C:\\path'
1506 result = urllib.request.url2pathname(given)
1507 self.assertEqual(expect, result,
1508 'urllib.request.url2pathname() failed; %s != %s' %
1509 (expect, result))
1510
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001511class Utility_Tests(unittest.TestCase):
1512 """Testcase to test the various utility functions in the urllib."""
1513
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001514 def test_thishost(self):
1515 """Test the urllib.request.thishost utility function returns a tuple"""
1516 self.assertIsInstance(urllib.request.thishost(), tuple)
1517
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001518
Xtreakc661b302019-05-19 19:10:06 +05301519class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001520 """Testcase to test the open method of URLopener class."""
1521
1522 def test_quoted_open(self):
1523 class DummyURLopener(urllib.request.URLopener):
1524 def open_spam(self, url):
1525 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001526 with support.check_warnings(
1527 ('DummyURLopener style of invoking requests is deprecated.',
1528 DeprecationWarning)):
1529 self.assertEqual(DummyURLopener().open(
1530 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001531
Ezio Melotti79b99db2013-02-21 02:41:42 +02001532 # test the safe characters are not quoted by urlopen
1533 self.assertEqual(DummyURLopener().open(
1534 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1535 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001536
Xtreakc661b302019-05-19 19:10:06 +05301537 @support.ignore_warnings(category=DeprecationWarning)
1538 def test_urlopener_retrieve_file(self):
1539 with support.temp_dir() as tmpdir:
1540 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1541 os.close(fd)
1542 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1543 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001544 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1545 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301546
1547 @support.ignore_warnings(category=DeprecationWarning)
1548 def test_urlopener_retrieve_remote(self):
1549 url = "http://www.python.org/file.txt"
1550 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1551 self.addCleanup(self.unfakehttp)
1552 filename, _ = urllib.request.URLopener().retrieve(url)
1553 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1554
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001555 @support.ignore_warnings(category=DeprecationWarning)
1556 def test_local_file_open(self):
1557 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1558 class DummyURLopener(urllib.request.URLopener):
1559 def open_local_file(self, url):
1560 return url
1561 for url in ('local_file://example', 'local-file://example'):
1562 self.assertRaises(OSError, urllib.request.urlopen, url)
1563 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1564 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1565 self.assertRaises(OSError, DummyURLopener().open, url)
1566 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1567
Xtreakc661b302019-05-19 19:10:06 +05301568
Senthil Kumarande49d642011-10-16 23:54:44 +08001569class RequestTests(unittest.TestCase):
1570 """Unit tests for urllib.request.Request."""
1571
1572 def test_default_values(self):
1573 Request = urllib.request.Request
1574 request = Request("http://www.python.org")
1575 self.assertEqual(request.get_method(), 'GET')
1576 request = Request("http://www.python.org", {})
1577 self.assertEqual(request.get_method(), 'POST')
1578
1579 def test_with_method_arg(self):
1580 Request = urllib.request.Request
1581 request = Request("http://www.python.org", method='HEAD')
1582 self.assertEqual(request.method, 'HEAD')
1583 self.assertEqual(request.get_method(), 'HEAD')
1584 request = Request("http://www.python.org", {}, method='HEAD')
1585 self.assertEqual(request.method, 'HEAD')
1586 self.assertEqual(request.get_method(), 'HEAD')
1587 request = Request("http://www.python.org", method='GET')
1588 self.assertEqual(request.get_method(), 'GET')
1589 request.method = 'HEAD'
1590 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001591
1592
Senthil Kumaran277e9092013-04-10 20:51:19 -07001593class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001594
Senthil Kumaran277e9092013-04-10 20:51:19 -07001595 def test_converting_drive_letter(self):
1596 self.assertEqual(url2pathname("///C|"), 'C:')
1597 self.assertEqual(url2pathname("///C:"), 'C:')
1598 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001599
Senthil Kumaran277e9092013-04-10 20:51:19 -07001600 def test_converting_when_no_drive_letter(self):
1601 # cannot end a raw string in \
1602 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1603 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1604
1605 def test_simple_compare(self):
1606 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1607 r'C:\foo\bar\spam.foo')
1608
1609 def test_non_ascii_drive_letter(self):
1610 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1611
1612 def test_roundtrip_url2pathname(self):
1613 list_of_paths = ['C:',
1614 r'\\\C\test\\',
1615 r'C:\foo\bar\spam.foo'
1616 ]
1617 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001618 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001619
1620class PathName2URLTests(unittest.TestCase):
1621
1622 def test_converting_drive_letter(self):
1623 self.assertEqual(pathname2url("C:"), '///C:')
1624 self.assertEqual(pathname2url("C:\\"), '///C:')
1625
1626 def test_converting_when_no_drive_letter(self):
1627 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1628 '/////folder/test/')
1629 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1630 '////folder/test/')
1631 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1632 '/folder/test/')
1633
1634 def test_simple_compare(self):
1635 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1636 "///C:/foo/bar/spam.foo" )
1637
1638 def test_long_drive_letter(self):
1639 self.assertRaises(IOError, pathname2url, "XX:\\")
1640
1641 def test_roundtrip_pathname2url(self):
1642 list_of_paths = ['///C:',
1643 '/////folder/test/',
1644 '///C:/foo/bar/spam.foo']
1645 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001646 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001647
1648if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001649 unittest.main()