blob: 95c4ecc4dcf292a0efb6d4ed57cb91e52c50d7a3 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Victor Stinnereb976e42019-06-12 04:07:38 +020059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Victor Stinnereb976e42019-06-12 04:07:38 +020093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Victor Stinnereb976e42019-06-12 04:07:38 +0200107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Victor Stinnereb976e42019-06-12 04:07:38 +0200110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def tearDown(self):
157 """Shut down the open object"""
158 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000159 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_interface(self):
162 # Make sure object returned by urlopen() has the specified methods
163 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000164 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000165 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "object returned by urlopen() lacks %s attribute" %
167 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_read(self):
170 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_readline(self):
173 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000174 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 "calling readline() after exhausting the file did not"
176 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_readlines(self):
179 lines_list = self.returned_obj.readlines()
180 self.assertEqual(len(lines_list), 1,
181 "readlines() returned the wrong number of lines")
182 self.assertEqual(lines_list[0], self.text,
183 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 def test_fileno(self):
186 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000187 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 self.assertEqual(os.read(file_num, len(self.text)), self.text,
189 "Reading on the file descriptor returned by fileno() "
190 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800193 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000194 # by the tearDown() method for the test
195 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700197 def test_headers(self):
198 self.assertIsInstance(self.returned_obj.headers, email.message.Message)
199
200 def test_url(self):
201 self.assertEqual(self.returned_obj.url, self.pathname)
202
203 def test_status(self):
204 self.assertIsNone(self.returned_obj.status)
205
Brett Cannon74bfd702003-04-25 09:39:47 +0000206 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000207 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000208
Brett Cannon74bfd702003-04-25 09:39:47 +0000209 def test_geturl(self):
210 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000211
Christian Heimes9bd667a2008-01-20 15:14:11 +0000212 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000213 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000214
Brett Cannon74bfd702003-04-25 09:39:47 +0000215 def test_iter(self):
216 # Test iterator
217 # Don't need to count number of iterations since test would fail the
218 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200219 # comparison.
220 # Use the iterator in the usual implicit way to test for ticket #4608.
221 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000222 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000223
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800224 def test_relativelocalfile(self):
225 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
226
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700227
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000228class ProxyTests(unittest.TestCase):
229
230 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000231 # Records changes to env vars
232 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000233 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000234 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000235 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000236 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000237
238 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000239 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000240 self.env.__exit__()
241 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000242
243 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000244 self.env.set('NO_PROXY', 'localhost')
245 proxies = urllib.request.getproxies_environment()
246 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000247 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800248 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700249 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800250 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700251 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
252 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
253
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700254 def test_proxy_cgi_ignore(self):
255 try:
256 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
257 proxies = urllib.request.getproxies_environment()
258 self.assertEqual('http://somewhere:3128', proxies['http'])
259 self.env.set('REQUEST_METHOD', 'GET')
260 proxies = urllib.request.getproxies_environment()
261 self.assertNotIn('http', proxies)
262 finally:
263 self.env.unset('REQUEST_METHOD')
264 self.env.unset('HTTP_PROXY')
265
Martin Panteraa279822016-04-30 01:03:40 +0000266 def test_proxy_bypass_environment_host_match(self):
267 bypass = urllib.request.proxy_bypass_environment
268 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800269 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000270 self.assertTrue(bypass('localhost'))
271 self.assertTrue(bypass('LocalHost')) # MixedCase
272 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
273 self.assertTrue(bypass('newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800274 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Martin Panteraa279822016-04-30 01:03:40 +0000275 self.assertTrue(bypass('anotherdomain.com:8888'))
276 self.assertTrue(bypass('www.newdomain.com:1234'))
277 self.assertFalse(bypass('prelocalhost'))
278 self.assertFalse(bypass('newdomain.com')) # no port
279 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700280
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700281
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700282class ProxyTests_withOrderedEnv(unittest.TestCase):
283
284 def setUp(self):
285 # We need to test conditions, where variable order _is_ significant
286 self._saved_env = os.environ
287 # Monkey patch os.environ, start with empty fake environment
288 os.environ = collections.OrderedDict()
289
290 def tearDown(self):
291 os.environ = self._saved_env
292
293 def test_getproxies_environment_prefer_lowercase(self):
294 # Test lowercase preference with removal
295 os.environ['no_proxy'] = ''
296 os.environ['No_Proxy'] = 'localhost'
297 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
298 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
299 os.environ['http_proxy'] = ''
300 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
301 proxies = urllib.request.getproxies_environment()
302 self.assertEqual({}, proxies)
303 # Test lowercase preference of proxy bypass and correct matching including ports
304 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
305 os.environ['No_Proxy'] = 'xyz.com'
306 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
307 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
308 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
309 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
310 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
311 # Test lowercase preference with replacement
312 os.environ['http_proxy'] = 'http://somewhere:3128'
313 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
314 proxies = urllib.request.getproxies_environment()
315 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000316
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700317
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700318class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000319 """Test urlopen() opening a fake http connection."""
320
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000321 def check_read(self, ver):
322 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000323 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000324 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000325 self.assertEqual(fp.readline(), b"Hello!")
326 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000327 self.assertEqual(fp.geturl(), 'http://python.org/')
328 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000329 finally:
330 self.unfakehttp()
331
Senthil Kumaran26430412011-04-13 07:01:19 +0800332 def test_url_fragment(self):
333 # Issue #11703: geturl() omits fragments in the original URL.
334 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800335 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800336 try:
337 fp = urllib.request.urlopen(url)
338 self.assertEqual(fp.geturl(), url)
339 finally:
340 self.unfakehttp()
341
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800342 def test_willclose(self):
343 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800344 try:
345 resp = urlopen("http://www.python.org")
346 self.assertTrue(resp.fp.will_close)
347 finally:
348 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800349
Xtreak2fc936e2019-05-01 17:29:49 +0530350 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700351 def test_url_with_control_char_rejected(self):
352 for char_no in list(range(0, 0x21)) + [0x7f]:
353 char = chr(char_no)
354 schemeless_url = f"//localhost:7777/test{char}/"
355 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
356 try:
357 # We explicitly test urllib.request.urlopen() instead of the top
358 # level 'def urlopen()' function defined in this... (quite ugly)
359 # test suite. They use different url opening codepaths. Plain
360 # urlopen uses FancyURLOpener which goes via a codepath that
361 # calls urllib.parse.quote() on the URL which makes all of the
362 # above attempts at injection within the url _path_ safe.
363 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400364 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700365 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400366 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700367 urllib.request.urlopen(f"http:{schemeless_url}")
368 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400369 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700370 urllib.request.urlopen(f"https:{schemeless_url}")
371 # This code path quotes the URL so there is no injection.
372 resp = urlopen(f"http:{schemeless_url}")
373 self.assertNotIn(char, resp.geturl())
374 finally:
375 self.unfakehttp()
376
Xtreak2fc936e2019-05-01 17:29:49 +0530377 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700378 def test_url_with_newline_header_injection_rejected(self):
379 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
380 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
381 schemeless_url = "//" + host + ":8080/test/?test=a"
382 try:
383 # We explicitly test urllib.request.urlopen() instead of the top
384 # level 'def urlopen()' function defined in this... (quite ugly)
385 # test suite. They use different url opening codepaths. Plain
386 # urlopen uses FancyURLOpener which goes via a codepath that
387 # calls urllib.parse.quote() on the URL which makes all of the
388 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400389 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700390 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400391 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700392 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400393 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700394 urllib.request.urlopen(f"https:{schemeless_url}")
395 # This code path quotes the URL so there is no injection.
396 resp = urlopen(f"http:{schemeless_url}")
397 self.assertNotIn(' ', resp.geturl())
398 self.assertNotIn('\r', resp.geturl())
399 self.assertNotIn('\n', resp.geturl())
400 finally:
401 self.unfakehttp()
402
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000403 def test_read_0_9(self):
404 # "0.9" response accepted (but not "simple responses" without
405 # a status line)
406 self.check_read(b"0.9")
407
408 def test_read_1_0(self):
409 self.check_read(b"1.0")
410
411 def test_read_1_1(self):
412 self.check_read(b"1.1")
413
Christian Heimes57dddfb2008-01-02 18:30:52 +0000414 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200415 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000416 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
417Date: Wed, 02 Jan 2008 03:03:54 GMT
418Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
419Connection: close
420Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200421''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000422 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200423 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000424 finally:
425 self.unfakehttp()
426
guido@google.coma119df92011-03-29 11:41:02 -0700427 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200428 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700429 self.fakehttp(b'''HTTP/1.1 302 Found
430Date: Wed, 02 Jan 2008 03:03:54 GMT
431Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
432Location: file://guidocomputer.athome.com:/python/license
433Connection: close
434Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200435''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700436 try:
Martin Pantera0370222016-02-04 06:01:35 +0000437 msg = "Redirection to url 'file:"
438 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
439 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700440 finally:
441 self.unfakehttp()
442
Martin Pantera0370222016-02-04 06:01:35 +0000443 def test_redirect_limit_independent(self):
444 # Ticket #12923: make sure independent requests each use their
445 # own retry limit.
446 for i in range(FancyURLopener().maxtries):
447 self.fakehttp(b'''HTTP/1.1 302 Found
448Location: file://guidocomputer.athome.com:/python/license
449Connection: close
Victor Stinnereb976e42019-06-12 04:07:38 +0200450''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000451 try:
452 self.assertRaises(urllib.error.HTTPError, urlopen,
453 "http://something")
454 finally:
455 self.unfakehttp()
456
Guido van Rossumd8faa362007-04-27 19:54:29 +0000457 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200458 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000459 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000460 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000461 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200462 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000463 finally:
464 self.unfakehttp()
465
Senthil Kumaranf5776862012-10-21 13:30:02 -0700466 def test_missing_localfile(self):
467 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700468 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700469 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700470 self.assertTrue(e.exception.filename)
471 self.assertTrue(e.exception.reason)
472
473 def test_file_notexists(self):
474 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700475 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700476 try:
477 self.assertTrue(os.path.exists(tmp_file))
478 with urlopen(tmp_fileurl) as fobj:
479 self.assertTrue(fobj)
480 finally:
481 os.close(fd)
482 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700483 self.assertFalse(os.path.exists(tmp_file))
484 with self.assertRaises(urllib.error.URLError):
485 urlopen(tmp_fileurl)
486
487 def test_ftp_nohost(self):
488 test_ftp_url = 'ftp:///path'
489 with self.assertRaises(urllib.error.URLError) as e:
490 urlopen(test_ftp_url)
491 self.assertFalse(e.exception.filename)
492 self.assertTrue(e.exception.reason)
493
494 def test_ftp_nonexisting(self):
495 with self.assertRaises(urllib.error.URLError) as e:
496 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
497 self.assertFalse(e.exception.filename)
498 self.assertTrue(e.exception.reason)
499
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700500 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
501 def test_ftp_cache_pruning(self):
502 self.fakeftp()
503 try:
504 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
505 urlopen('ftp://localhost')
506 finally:
507 self.unfakeftp()
508
Senthil Kumarande0eb242010-08-01 17:53:37 +0000509 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000510 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000511 try:
512 fp = urlopen("http://user:pass@python.org/")
513 self.assertEqual(fp.readline(), b"Hello!")
514 self.assertEqual(fp.readline(), b"")
515 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
516 self.assertEqual(fp.getcode(), 200)
517 finally:
518 self.unfakehttp()
519
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800520 def test_userpass_inurl_w_spaces(self):
521 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
522 try:
523 userpass = "a b:c d"
524 url = "http://{}@python.org/".format(userpass)
525 fakehttp_wrapper = http.client.HTTPConnection
526 authorization = ("Authorization: Basic %s\r\n" %
527 b64encode(userpass.encode("ASCII")).decode("ASCII"))
528 fp = urlopen(url)
529 # The authorization header must be in place
530 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
531 self.assertEqual(fp.readline(), b"Hello!")
532 self.assertEqual(fp.readline(), b"")
533 # the spaces are quoted in URL so no match
534 self.assertNotEqual(fp.geturl(), url)
535 self.assertEqual(fp.getcode(), 200)
536 finally:
537 self.unfakehttp()
538
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700539 def test_URLopener_deprecation(self):
540 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700541 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700542
Antoine Pitrou07df6552014-11-02 17:23:14 +0100543 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800544 def test_cafile_and_context(self):
545 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200546 with support.check_warnings(('', DeprecationWarning)):
547 with self.assertRaises(ValueError):
548 urllib.request.urlopen(
549 "https://localhost", cafile="/nonexistent/path", context=context
550 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800551
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700552
Antoine Pitroudf204be2012-11-24 17:59:08 +0100553class urlopen_DataTests(unittest.TestCase):
554 """Test urlopen() opening a data URL."""
555
556 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200557 # clear _opener global variable
558 self.addCleanup(urllib.request.urlcleanup)
559
Antoine Pitroudf204be2012-11-24 17:59:08 +0100560 # text containing URL special- and unicode-characters
561 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
562 # 2x1 pixel RGB PNG image with one black and one white pixel
563 self.image = (
564 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
565 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
566 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
567 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
568
569 self.text_url = (
570 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
571 "D%26%20%C3%B6%20%C3%84%20")
572 self.text_url_base64 = (
573 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
574 "sJT0mIPYgxCA%3D")
575 # base64 encoded data URL that contains ignorable spaces,
576 # such as "\n", " ", "%0A", and "%20".
577 self.image_url = (
578 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
579 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
580 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
581
582 self.text_url_resp = urllib.request.urlopen(self.text_url)
583 self.text_url_base64_resp = urllib.request.urlopen(
584 self.text_url_base64)
585 self.image_url_resp = urllib.request.urlopen(self.image_url)
586
587 def test_interface(self):
588 # Make sure object returned by urlopen() has the specified methods
589 for attr in ("read", "readline", "readlines",
590 "close", "info", "geturl", "getcode", "__iter__"):
591 self.assertTrue(hasattr(self.text_url_resp, attr),
592 "object returned by urlopen() lacks %s attribute" %
593 attr)
594
595 def test_info(self):
596 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
597 self.assertEqual(self.text_url_base64_resp.info().get_params(),
598 [('text/plain', ''), ('charset', 'ISO-8859-1')])
599 self.assertEqual(self.image_url_resp.info()['content-length'],
600 str(len(self.image)))
601 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
602 [('text/plain', ''), ('charset', 'US-ASCII')])
603
604 def test_geturl(self):
605 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
606 self.assertEqual(self.text_url_base64_resp.geturl(),
607 self.text_url_base64)
608 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
609
610 def test_read_text(self):
611 self.assertEqual(self.text_url_resp.read().decode(
612 dict(self.text_url_resp.info().get_params())['charset']), self.text)
613
614 def test_read_text_base64(self):
615 self.assertEqual(self.text_url_base64_resp.read().decode(
616 dict(self.text_url_base64_resp.info().get_params())['charset']),
617 self.text)
618
619 def test_read_image(self):
620 self.assertEqual(self.image_url_resp.read(), self.image)
621
622 def test_missing_comma(self):
623 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
624
625 def test_invalid_base64_data(self):
626 # missing padding character
627 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
628
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700629
Brett Cannon19691362003-04-29 05:08:06 +0000630class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000631 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000632
Brett Cannon19691362003-04-29 05:08:06 +0000633 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200634 # clear _opener global variable
635 self.addCleanup(urllib.request.urlcleanup)
636
Georg Brandl5a650a22005-08-26 08:51:34 +0000637 # Create a list of temporary files. Each item in the list is a file
638 # name (absolute path or relative to the current working directory).
639 # All files in this list will be deleted in the tearDown method. Note,
640 # this only helps to makes sure temporary files get deleted, but it
641 # does nothing about trying to close files that may still be open. It
642 # is the responsibility of the developer to properly close files even
643 # when exceptional conditions occur.
644 self.tempFiles = []
645
Brett Cannon19691362003-04-29 05:08:06 +0000646 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000647 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000648 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000649 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000650 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000651 FILE.write(self.text)
652 FILE.close()
653 finally:
654 try: FILE.close()
655 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000656
657 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000658 # Delete the temporary files.
659 for each in self.tempFiles:
660 try: os.remove(each)
661 except: pass
662
663 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000664 filePath = os.path.abspath(filePath)
665 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000666 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000667 except UnicodeEncodeError:
668 raise unittest.SkipTest("filePath is not encodable to utf8")
669 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000670
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000671 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000672 """Creates a new temporary file containing the specified data,
673 registers the file for deletion during the test fixture tear down, and
674 returns the absolute path of the file."""
675
676 newFd, newFilePath = tempfile.mkstemp()
677 try:
678 self.registerFileForCleanUp(newFilePath)
679 newFile = os.fdopen(newFd, "wb")
680 newFile.write(data)
681 newFile.close()
682 finally:
683 try: newFile.close()
684 except: pass
685 return newFilePath
686
687 def registerFileForCleanUp(self, fileName):
688 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000689
690 def test_basic(self):
691 # Make sure that a local file just gets its own location returned and
692 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000693 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000694 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000695 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000696 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000697 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000698
699 def test_copy(self):
700 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000701 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000702 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000703 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000704 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000705 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000706 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000707 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000708 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000709 try:
710 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000711 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000712 finally:
713 try: FILE.close()
714 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000715 self.assertEqual(self.text, text)
716
717 def test_reporthook(self):
718 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700719 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
720 self.assertIsInstance(block_count, int)
721 self.assertIsInstance(block_read_size, int)
722 self.assertIsInstance(file_size, int)
723 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000724 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000725 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000726 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000727 urllib.request.urlretrieve(
728 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000729 second_temp, hooktester)
730
731 def test_reporthook_0_bytes(self):
732 # Test on zero length file. Should call reporthook only 1 time.
733 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700734 def hooktester(block_count, block_read_size, file_size, _report=report):
735 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000736 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000737 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000738 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000739 self.assertEqual(len(report), 1)
740 self.assertEqual(report[0][2], 0)
741
742 def test_reporthook_5_bytes(self):
743 # Test on 5 byte file. Should call reporthook only 2 times (once when
744 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700745 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000746 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700747 def hooktester(block_count, block_read_size, file_size, _report=report):
748 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000749 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000751 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000752 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800753 self.assertEqual(report[0][2], 5)
754 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000755
756 def test_reporthook_8193_bytes(self):
757 # Test on 8193 byte file. Should call reporthook only 3 times (once
758 # when the "network connection" is established, once for the next 8192
759 # bytes, and once for the last byte).
760 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700761 def hooktester(block_count, block_read_size, file_size, _report=report):
762 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000763 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000765 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000766 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800767 self.assertEqual(report[0][2], 8193)
768 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700769 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800770 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000771
Senthil Kumarance260142011-11-01 01:35:17 +0800772
773class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
774 """Test urllib.urlretrieve() using fake http connections"""
775
776 def test_short_content_raises_ContentTooShortError(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200777 self.addCleanup(urllib.request.urlcleanup)
778
Senthil Kumarance260142011-11-01 01:35:17 +0800779 self.fakehttp(b'''HTTP/1.1 200 OK
780Date: Wed, 02 Jan 2008 03:03:54 GMT
781Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
782Connection: close
783Content-Length: 100
784Content-Type: text/html; charset=iso-8859-1
785
786FF
787''')
788
789 def _reporthook(par1, par2, par3):
790 pass
791
792 with self.assertRaises(urllib.error.ContentTooShortError):
793 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100794 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800795 reporthook=_reporthook)
796 finally:
797 self.unfakehttp()
798
799 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200800 self.addCleanup(urllib.request.urlcleanup)
801
Senthil Kumarance260142011-11-01 01:35:17 +0800802 self.fakehttp(b'''HTTP/1.1 200 OK
803Date: Wed, 02 Jan 2008 03:03:54 GMT
804Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
805Connection: close
806Content-Length: 100
807Content-Type: text/html; charset=iso-8859-1
808
809FF
810''')
811 with self.assertRaises(urllib.error.ContentTooShortError):
812 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100813 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800814 finally:
815 self.unfakehttp()
816
817
Brett Cannon74bfd702003-04-25 09:39:47 +0000818class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400819 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000820
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530821 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 character you write it as '%' + <2 character US-ASCII hex value>.
823 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
824 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000825
826 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000827
Brett Cannon74bfd702003-04-25 09:39:47 +0000828 Reserved characters : ";/?:@&=+$,"
829 Have special meaning in URIs and must be escaped if not being used for
830 their special meaning
831 Data characters : letters, digits, and "-_.!~*'()"
832 Unreserved and do not need to be escaped; can be, though, if desired
833 Control characters : 0x00 - 0x1F, 0x7F
834 Have no use in URIs so must be escaped
835 space : 0x20
836 Must be escaped
837 Delimiters : '<>#%"'
838 Must be escaped
839 Unwise : "{}|\^[]`"
840 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000841
Brett Cannon74bfd702003-04-25 09:39:47 +0000842 """
843
844 def test_never_quote(self):
845 # Make sure quote() does not quote letters, digits, and "_,.-"
846 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
847 "abcdefghijklmnopqrstuvwxyz",
848 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530849 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000850 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000851 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000852 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000853 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000854 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000855 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000856
857 def test_default_safe(self):
858 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000859 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000860
861 def test_safe(self):
862 # Test setting 'safe' parameter does what it should do
863 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000864 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000865 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000866 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000867 result = urllib.parse.quote_plus(quote_by_default,
868 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000869 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000870 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000871 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000872 # Safe expressed as bytes rather than str
873 result = urllib.parse.quote(quote_by_default, safe=b"<>")
874 self.assertEqual(quote_by_default, result,
875 "using quote(): %r != %r" % (quote_by_default, result))
876 # "Safe" non-ASCII characters should have no effect
877 # (Since URIs are not allowed to have non-ASCII characters)
878 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
879 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
880 self.assertEqual(expect, result,
881 "using quote(): %r != %r" %
882 (expect, result))
883 # Same as above, but using a bytes rather than str
884 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
885 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
886 self.assertEqual(expect, result,
887 "using quote(): %r != %r" %
888 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000889
890 def test_default_quoting(self):
891 # Make sure all characters that should be quoted are by default sans
892 # space (separate test for that).
893 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400894 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000895 should_quote.append(chr(127)) # For 0x7F
896 should_quote = ''.join(should_quote)
897 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000898 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000899 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000900 "using quote(): "
901 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000902 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000903 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000904 self.assertEqual(hexescape(char), result,
905 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000906 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000907 (char, hexescape(char), result))
908 del should_quote
909 partial_quote = "ab[]cd"
910 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000911 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000912 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000913 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800914 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000915 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000916 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000917
918 def test_quoting_space(self):
919 # Make sure quote() and quote_plus() handle spaces as specified in
920 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000921 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000922 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000923 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000924 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000925 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000926 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000927 given = "a b cd e f"
928 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000929 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000930 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000931 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000932 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000933 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000934 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000935 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000936
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000937 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000938 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000939 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000940 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000941 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000942 # Test with bytes
943 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
944 'alpha%2Bbeta+gamma')
945 # Test with safe bytes
946 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
947 'alpha+beta+gamma')
948
949 def test_quote_bytes(self):
950 # Bytes should quote directly to percent-encoded values
951 given = b"\xa2\xd8ab\xff"
952 expect = "%A2%D8ab%FF"
953 result = urllib.parse.quote(given)
954 self.assertEqual(expect, result,
955 "using quote(): %r != %r" % (expect, result))
956 # Encoding argument should raise type error on bytes input
957 self.assertRaises(TypeError, urllib.parse.quote, given,
958 encoding="latin-1")
959 # quote_from_bytes should work the same
960 result = urllib.parse.quote_from_bytes(given)
961 self.assertEqual(expect, result,
962 "using quote_from_bytes(): %r != %r"
963 % (expect, result))
964
965 def test_quote_with_unicode(self):
966 # Characters in Latin-1 range, encoded by default in UTF-8
967 given = "\xa2\xd8ab\xff"
968 expect = "%C2%A2%C3%98ab%C3%BF"
969 result = urllib.parse.quote(given)
970 self.assertEqual(expect, result,
971 "using quote(): %r != %r" % (expect, result))
972 # Characters in Latin-1 range, encoded by with None (default)
973 result = urllib.parse.quote(given, encoding=None, errors=None)
974 self.assertEqual(expect, result,
975 "using quote(): %r != %r" % (expect, result))
976 # Characters in Latin-1 range, encoded with Latin-1
977 given = "\xa2\xd8ab\xff"
978 expect = "%A2%D8ab%FF"
979 result = urllib.parse.quote(given, encoding="latin-1")
980 self.assertEqual(expect, result,
981 "using quote(): %r != %r" % (expect, result))
982 # Characters in BMP, encoded by default in UTF-8
983 given = "\u6f22\u5b57" # "Kanji"
984 expect = "%E6%BC%A2%E5%AD%97"
985 result = urllib.parse.quote(given)
986 self.assertEqual(expect, result,
987 "using quote(): %r != %r" % (expect, result))
988 # Characters in BMP, encoded with Latin-1
989 given = "\u6f22\u5b57"
990 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
991 encoding="latin-1")
992 # Characters in BMP, encoded with Latin-1, with replace error handling
993 given = "\u6f22\u5b57"
994 expect = "%3F%3F" # "??"
995 result = urllib.parse.quote(given, encoding="latin-1",
996 errors="replace")
997 self.assertEqual(expect, result,
998 "using quote(): %r != %r" % (expect, result))
999 # Characters in BMP, Latin-1, with xmlcharref error handling
1000 given = "\u6f22\u5b57"
1001 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1002 result = urllib.parse.quote(given, encoding="latin-1",
1003 errors="xmlcharrefreplace")
1004 self.assertEqual(expect, result,
1005 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001006
Georg Brandlfaf41492009-05-26 18:31:11 +00001007 def test_quote_plus_with_unicode(self):
1008 # Encoding (latin-1) test for quote_plus
1009 given = "\xa2\xd8 \xff"
1010 expect = "%A2%D8+%FF"
1011 result = urllib.parse.quote_plus(given, encoding="latin-1")
1012 self.assertEqual(expect, result,
1013 "using quote_plus(): %r != %r" % (expect, result))
1014 # Errors test for quote_plus
1015 given = "ab\u6f22\u5b57 cd"
1016 expect = "ab%3F%3F+cd"
1017 result = urllib.parse.quote_plus(given, encoding="latin-1",
1018 errors="replace")
1019 self.assertEqual(expect, result,
1020 "using quote_plus(): %r != %r" % (expect, result))
1021
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001022
Brett Cannon74bfd702003-04-25 09:39:47 +00001023class UnquotingTests(unittest.TestCase):
1024 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001025
Brett Cannon74bfd702003-04-25 09:39:47 +00001026 See the doc string for quoting_Tests for details on quoting and such.
1027
1028 """
1029
1030 def test_unquoting(self):
1031 # Make sure unquoting of all ASCII values works
1032 escape_list = []
1033 for num in range(128):
1034 given = hexescape(chr(num))
1035 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001036 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001037 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001038 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001039 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001040 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001041 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001042 (expect, result))
1043 escape_list.append(given)
1044 escape_string = ''.join(escape_list)
1045 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001046 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001047 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001048 "using unquote(): not all characters escaped: "
1049 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001050 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1051 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Brett Cannon74bfd702003-04-25 09:39:47 +00001052
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001053 def test_unquoting_badpercent(self):
1054 # Test unquoting on bad percent-escapes
1055 given = '%xab'
1056 expect = given
1057 result = urllib.parse.unquote(given)
1058 self.assertEqual(expect, result, "using unquote(): %r != %r"
1059 % (expect, result))
1060 given = '%x'
1061 expect = given
1062 result = urllib.parse.unquote(given)
1063 self.assertEqual(expect, result, "using unquote(): %r != %r"
1064 % (expect, result))
1065 given = '%'
1066 expect = given
1067 result = urllib.parse.unquote(given)
1068 self.assertEqual(expect, result, "using unquote(): %r != %r"
1069 % (expect, result))
1070 # unquote_to_bytes
1071 given = '%xab'
1072 expect = bytes(given, 'ascii')
1073 result = urllib.parse.unquote_to_bytes(given)
1074 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1075 % (expect, result))
1076 given = '%x'
1077 expect = bytes(given, 'ascii')
1078 result = urllib.parse.unquote_to_bytes(given)
1079 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1080 % (expect, result))
1081 given = '%'
1082 expect = bytes(given, 'ascii')
1083 result = urllib.parse.unquote_to_bytes(given)
1084 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1085 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001086 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1087 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001088
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001089 def test_unquoting_mixed_case(self):
1090 # Test unquoting on mixed-case hex digits in the percent-escapes
1091 given = '%Ab%eA'
1092 expect = b'\xab\xea'
1093 result = urllib.parse.unquote_to_bytes(given)
1094 self.assertEqual(expect, result,
1095 "using unquote_to_bytes(): %r != %r"
1096 % (expect, result))
1097
Brett Cannon74bfd702003-04-25 09:39:47 +00001098 def test_unquoting_parts(self):
1099 # Make sure unquoting works when have non-quoted characters
1100 # interspersed
1101 given = 'ab%sd' % hexescape('c')
1102 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001103 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001104 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001105 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001106 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001107 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001108 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001109
Brett Cannon74bfd702003-04-25 09:39:47 +00001110 def test_unquoting_plus(self):
1111 # Test difference between unquote() and unquote_plus()
1112 given = "are+there+spaces..."
1113 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001114 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001115 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001116 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001117 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001118 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001119 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001120 "using unquote_plus(): %r != %r" % (expect, result))
1121
1122 def test_unquote_to_bytes(self):
1123 given = 'br%C3%BCckner_sapporo_20050930.doc'
1124 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1125 result = urllib.parse.unquote_to_bytes(given)
1126 self.assertEqual(expect, result,
1127 "using unquote_to_bytes(): %r != %r"
1128 % (expect, result))
1129 # Test on a string with unescaped non-ASCII characters
1130 # (Technically an invalid URI; expect those characters to be UTF-8
1131 # encoded).
1132 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1133 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1134 self.assertEqual(expect, result,
1135 "using unquote_to_bytes(): %r != %r"
1136 % (expect, result))
1137 # Test with a bytes as input
1138 given = b'%A2%D8ab%FF'
1139 expect = b'\xa2\xd8ab\xff'
1140 result = urllib.parse.unquote_to_bytes(given)
1141 self.assertEqual(expect, result,
1142 "using unquote_to_bytes(): %r != %r"
1143 % (expect, result))
1144 # Test with a bytes as input, with unescaped non-ASCII bytes
1145 # (Technically an invalid URI; expect those bytes to be preserved)
1146 given = b'%A2\xd8ab%FF'
1147 expect = b'\xa2\xd8ab\xff'
1148 result = urllib.parse.unquote_to_bytes(given)
1149 self.assertEqual(expect, result,
1150 "using unquote_to_bytes(): %r != %r"
1151 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001152
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001153 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001154 # Characters in the Latin-1 range, encoded with UTF-8
1155 given = 'br%C3%BCckner_sapporo_20050930.doc'
1156 expect = 'br\u00fcckner_sapporo_20050930.doc'
1157 result = urllib.parse.unquote(given)
1158 self.assertEqual(expect, result,
1159 "using unquote(): %r != %r" % (expect, result))
1160 # Characters in the Latin-1 range, encoded with None (default)
1161 result = urllib.parse.unquote(given, encoding=None, errors=None)
1162 self.assertEqual(expect, result,
1163 "using unquote(): %r != %r" % (expect, result))
1164
1165 # Characters in the Latin-1 range, encoded with Latin-1
1166 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1167 encoding="latin-1")
1168 expect = 'br\u00fcckner_sapporo_20050930.doc'
1169 self.assertEqual(expect, result,
1170 "using unquote(): %r != %r" % (expect, result))
1171
1172 # Characters in BMP, encoded with UTF-8
1173 given = "%E6%BC%A2%E5%AD%97"
1174 expect = "\u6f22\u5b57" # "Kanji"
1175 result = urllib.parse.unquote(given)
1176 self.assertEqual(expect, result,
1177 "using unquote(): %r != %r" % (expect, result))
1178
1179 # Decode with UTF-8, invalid sequence
1180 given = "%F3%B1"
1181 expect = "\ufffd" # Replacement character
1182 result = urllib.parse.unquote(given)
1183 self.assertEqual(expect, result,
1184 "using unquote(): %r != %r" % (expect, result))
1185
1186 # Decode with UTF-8, invalid sequence, replace errors
1187 result = urllib.parse.unquote(given, errors="replace")
1188 self.assertEqual(expect, result,
1189 "using unquote(): %r != %r" % (expect, result))
1190
1191 # Decode with UTF-8, invalid sequence, ignoring errors
1192 given = "%F3%B1"
1193 expect = ""
1194 result = urllib.parse.unquote(given, errors="ignore")
1195 self.assertEqual(expect, result,
1196 "using unquote(): %r != %r" % (expect, result))
1197
1198 # A mix of non-ASCII and percent-encoded characters, UTF-8
1199 result = urllib.parse.unquote("\u6f22%C3%BC")
1200 expect = '\u6f22\u00fc'
1201 self.assertEqual(expect, result,
1202 "using unquote(): %r != %r" % (expect, result))
1203
1204 # A mix of non-ASCII and percent-encoded characters, Latin-1
1205 # (Note, the string contains non-Latin-1-representable characters)
1206 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1207 expect = '\u6f22\u00fc'
1208 self.assertEqual(expect, result,
1209 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001210
Stein Karlsenaad2ee02019-10-14 12:36:29 +02001211 def test_unquoting_with_bytes_input(self):
1212 # ASCII characters decoded to a string
1213 given = b'blueberryjam'
1214 expect = 'blueberryjam'
1215 result = urllib.parse.unquote(given)
1216 self.assertEqual(expect, result,
1217 "using unquote(): %r != %r" % (expect, result))
1218
1219 # A mix of non-ASCII hex-encoded characters and ASCII characters
1220 given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
1221 expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
1222 result = urllib.parse.unquote(given)
1223 self.assertEqual(expect, result,
1224 "using unquote(): %r != %r" % (expect, result))
1225
1226 # A mix of non-ASCII percent-encoded characters and ASCII characters
1227 given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
1228 expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
1229 result = urllib.parse.unquote(given)
1230 self.assertEqual(expect, result,
1231 "using unquote(): %r != %r" % (expect, result))
1232
1233
Brett Cannon74bfd702003-04-25 09:39:47 +00001234class urlencode_Tests(unittest.TestCase):
1235 """Tests for urlencode()"""
1236
1237 def help_inputtype(self, given, test_type):
1238 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001239
Brett Cannon74bfd702003-04-25 09:39:47 +00001240 'given' must lead to only the pairs:
1241 * 1st, 1
1242 * 2nd, 2
1243 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001244
Brett Cannon74bfd702003-04-25 09:39:47 +00001245 Test cannot assume anything about order. Docs make no guarantee and
1246 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001247
Brett Cannon74bfd702003-04-25 09:39:47 +00001248 """
1249 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001250 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001251 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001252 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001253 "testing %s: %s not found in %s" %
1254 (test_type, expected, result))
1255 self.assertEqual(result.count('&'), 2,
1256 "testing %s: expected 2 '&'s; got %s" %
1257 (test_type, result.count('&')))
1258 amp_location = result.index('&')
1259 on_amp_left = result[amp_location - 1]
1260 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001261 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001262 "testing %s: '&' not located in proper place in %s" %
1263 (test_type, result))
1264 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1265 "testing %s: "
1266 "unexpected number of characters: %s != %s" %
1267 (test_type, len(result), (5 * 3) + 2))
1268
1269 def test_using_mapping(self):
1270 # Test passing in a mapping object as an argument.
1271 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1272 "using dict as input type")
1273
1274 def test_using_sequence(self):
1275 # Test passing in a sequence of two-item sequences as an argument.
1276 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1277 "using sequence of two-item tuples as input")
1278
1279 def test_quoting(self):
1280 # Make sure keys and values are quoted using quote_plus()
1281 given = {"&":"="}
1282 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001283 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001284 self.assertEqual(expect, result)
1285 given = {"key name":"A bunch of pluses"}
1286 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001287 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001288 self.assertEqual(expect, result)
1289
1290 def test_doseq(self):
1291 # Test that passing True for 'doseq' parameter works correctly
1292 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001293 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1294 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001295 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001296 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001297 for value in given["sequence"]:
1298 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001299 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001300 self.assertEqual(result.count('&'), 2,
1301 "Expected 2 '&'s, got %s" % result.count('&'))
1302
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001303 def test_empty_sequence(self):
1304 self.assertEqual("", urllib.parse.urlencode({}))
1305 self.assertEqual("", urllib.parse.urlencode([]))
1306
1307 def test_nonstring_values(self):
1308 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1309 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1310
1311 def test_nonstring_seq_values(self):
1312 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1313 self.assertEqual("a=None&a=a",
1314 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001315 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001316 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001317 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001318
Senthil Kumarandf022da2010-07-03 17:48:22 +00001319 def test_urlencode_encoding(self):
1320 # ASCII encoding. Expect %3F with errors="replace'
1321 given = (('\u00a0', '\u00c1'),)
1322 expect = '%3F=%3F'
1323 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1324 self.assertEqual(expect, result)
1325
1326 # Default is UTF-8 encoding.
1327 given = (('\u00a0', '\u00c1'),)
1328 expect = '%C2%A0=%C3%81'
1329 result = urllib.parse.urlencode(given)
1330 self.assertEqual(expect, result)
1331
1332 # Latin-1 encoding.
1333 given = (('\u00a0', '\u00c1'),)
1334 expect = '%A0=%C1'
1335 result = urllib.parse.urlencode(given, encoding="latin-1")
1336 self.assertEqual(expect, result)
1337
1338 def test_urlencode_encoding_doseq(self):
1339 # ASCII Encoding. Expect %3F with errors="replace'
1340 given = (('\u00a0', '\u00c1'),)
1341 expect = '%3F=%3F'
1342 result = urllib.parse.urlencode(given, doseq=True,
1343 encoding="ASCII", errors="replace")
1344 self.assertEqual(expect, result)
1345
1346 # ASCII Encoding. On a sequence of values.
1347 given = (("\u00a0", (1, "\u00c1")),)
1348 expect = '%3F=1&%3F=%3F'
1349 result = urllib.parse.urlencode(given, True,
1350 encoding="ASCII", errors="replace")
1351 self.assertEqual(expect, result)
1352
1353 # Utf-8
1354 given = (("\u00a0", "\u00c1"),)
1355 expect = '%C2%A0=%C3%81'
1356 result = urllib.parse.urlencode(given, True)
1357 self.assertEqual(expect, result)
1358
1359 given = (("\u00a0", (42, "\u00c1")),)
1360 expect = '%C2%A0=42&%C2%A0=%C3%81'
1361 result = urllib.parse.urlencode(given, True)
1362 self.assertEqual(expect, result)
1363
1364 # latin-1
1365 given = (("\u00a0", "\u00c1"),)
1366 expect = '%A0=%C1'
1367 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1368 self.assertEqual(expect, result)
1369
1370 given = (("\u00a0", (42, "\u00c1")),)
1371 expect = '%A0=42&%A0=%C1'
1372 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1373 self.assertEqual(expect, result)
1374
1375 def test_urlencode_bytes(self):
1376 given = ((b'\xa0\x24', b'\xc1\x24'),)
1377 expect = '%A0%24=%C1%24'
1378 result = urllib.parse.urlencode(given)
1379 self.assertEqual(expect, result)
1380 result = urllib.parse.urlencode(given, True)
1381 self.assertEqual(expect, result)
1382
1383 # Sequence of values
1384 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1385 expect = '%A0%24=42&%A0%24=%C1%24'
1386 result = urllib.parse.urlencode(given, True)
1387 self.assertEqual(expect, result)
1388
1389 def test_urlencode_encoding_safe_parameter(self):
1390
1391 # Send '$' (\x24) as safe character
1392 # Default utf-8 encoding
1393
1394 given = ((b'\xa0\x24', b'\xc1\x24'),)
1395 result = urllib.parse.urlencode(given, safe=":$")
1396 expect = '%A0$=%C1$'
1397 self.assertEqual(expect, result)
1398
1399 given = ((b'\xa0\x24', b'\xc1\x24'),)
1400 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1401 expect = '%A0$=%C1$'
1402 self.assertEqual(expect, result)
1403
1404 # Safe parameter in sequence
1405 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1406 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1407 result = urllib.parse.urlencode(given, True, safe=":$")
1408 self.assertEqual(expect, result)
1409
1410 # Test all above in latin-1 encoding
1411
1412 given = ((b'\xa0\x24', b'\xc1\x24'),)
1413 result = urllib.parse.urlencode(given, safe=":$",
1414 encoding="latin-1")
1415 expect = '%A0$=%C1$'
1416 self.assertEqual(expect, result)
1417
1418 given = ((b'\xa0\x24', b'\xc1\x24'),)
1419 expect = '%A0$=%C1$'
1420 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1421 encoding="latin-1")
1422
1423 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1424 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1425 result = urllib.parse.urlencode(given, True, safe=":$",
1426 encoding="latin-1")
1427 self.assertEqual(expect, result)
1428
Brett Cannon74bfd702003-04-25 09:39:47 +00001429class Pathname_Tests(unittest.TestCase):
1430 """Test pathname2url() and url2pathname()"""
1431
1432 def test_basic(self):
1433 # Make sure simple tests pass
1434 expected_path = os.path.join("parts", "of", "a", "path")
1435 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001436 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001437 self.assertEqual(expected_url, result,
1438 "pathname2url() failed; %s != %s" %
1439 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001440 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001441 self.assertEqual(expected_path, result,
1442 "url2pathame() failed; %s != %s" %
1443 (result, expected_path))
1444
1445 def test_quoting(self):
1446 # Test automatic quoting and unquoting works for pathnam2url() and
1447 # url2pathname() respectively
1448 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001449 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1450 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001451 self.assertEqual(expect, result,
1452 "pathname2url() failed; %s != %s" %
1453 (expect, result))
1454 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001455 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001456 self.assertEqual(expect, result,
1457 "url2pathname() failed; %s != %s" %
1458 (expect, result))
1459 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001460 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1461 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001462 self.assertEqual(expect, result,
1463 "pathname2url() failed; %s != %s" %
1464 (expect, result))
1465 given = "make+sure/using_unquote"
1466 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001467 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001468 self.assertEqual(expect, result,
1469 "url2pathname() failed; %s != %s" %
1470 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001471
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001472 @unittest.skipUnless(sys.platform == 'win32',
1473 'test specific to the urllib.url2path function.')
1474 def test_ntpath(self):
1475 given = ('/C:/', '///C:/', '/C|//')
1476 expect = 'C:\\'
1477 for url in given:
1478 result = urllib.request.url2pathname(url)
1479 self.assertEqual(expect, result,
1480 'urllib.request..url2pathname() failed; %s != %s' %
1481 (expect, result))
1482 given = '///C|/path'
1483 expect = 'C:\\path'
1484 result = urllib.request.url2pathname(given)
1485 self.assertEqual(expect, result,
1486 'urllib.request.url2pathname() failed; %s != %s' %
1487 (expect, result))
1488
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001489class Utility_Tests(unittest.TestCase):
1490 """Testcase to test the various utility functions in the urllib."""
1491
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001492 def test_thishost(self):
1493 """Test the urllib.request.thishost utility function returns a tuple"""
1494 self.assertIsInstance(urllib.request.thishost(), tuple)
1495
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001496
Xtreakc661b302019-05-19 19:10:06 +05301497class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001498 """Testcase to test the open method of URLopener class."""
1499
1500 def test_quoted_open(self):
1501 class DummyURLopener(urllib.request.URLopener):
1502 def open_spam(self, url):
1503 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001504 with support.check_warnings(
1505 ('DummyURLopener style of invoking requests is deprecated.',
1506 DeprecationWarning)):
1507 self.assertEqual(DummyURLopener().open(
1508 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001509
Ezio Melotti79b99db2013-02-21 02:41:42 +02001510 # test the safe characters are not quoted by urlopen
1511 self.assertEqual(DummyURLopener().open(
1512 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1513 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001514
Xtreakc661b302019-05-19 19:10:06 +05301515 @support.ignore_warnings(category=DeprecationWarning)
1516 def test_urlopener_retrieve_file(self):
1517 with support.temp_dir() as tmpdir:
1518 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1519 os.close(fd)
1520 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1521 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001522 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1523 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301524
1525 @support.ignore_warnings(category=DeprecationWarning)
1526 def test_urlopener_retrieve_remote(self):
1527 url = "http://www.python.org/file.txt"
1528 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1529 self.addCleanup(self.unfakehttp)
1530 filename, _ = urllib.request.URLopener().retrieve(url)
1531 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1532
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001533 @support.ignore_warnings(category=DeprecationWarning)
1534 def test_local_file_open(self):
1535 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1536 class DummyURLopener(urllib.request.URLopener):
1537 def open_local_file(self, url):
1538 return url
1539 for url in ('local_file://example', 'local-file://example'):
1540 self.assertRaises(OSError, urllib.request.urlopen, url)
1541 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1542 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1543 self.assertRaises(OSError, DummyURLopener().open, url)
1544 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1545
Xtreakc661b302019-05-19 19:10:06 +05301546
Senthil Kumarande49d642011-10-16 23:54:44 +08001547class RequestTests(unittest.TestCase):
1548 """Unit tests for urllib.request.Request."""
1549
1550 def test_default_values(self):
1551 Request = urllib.request.Request
1552 request = Request("http://www.python.org")
1553 self.assertEqual(request.get_method(), 'GET')
1554 request = Request("http://www.python.org", {})
1555 self.assertEqual(request.get_method(), 'POST')
1556
1557 def test_with_method_arg(self):
1558 Request = urllib.request.Request
1559 request = Request("http://www.python.org", method='HEAD')
1560 self.assertEqual(request.method, 'HEAD')
1561 self.assertEqual(request.get_method(), 'HEAD')
1562 request = Request("http://www.python.org", {}, method='HEAD')
1563 self.assertEqual(request.method, 'HEAD')
1564 self.assertEqual(request.get_method(), 'HEAD')
1565 request = Request("http://www.python.org", method='GET')
1566 self.assertEqual(request.get_method(), 'GET')
1567 request.method = 'HEAD'
1568 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001569
1570
Senthil Kumaran277e9092013-04-10 20:51:19 -07001571class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001572
Senthil Kumaran277e9092013-04-10 20:51:19 -07001573 def test_converting_drive_letter(self):
1574 self.assertEqual(url2pathname("///C|"), 'C:')
1575 self.assertEqual(url2pathname("///C:"), 'C:')
1576 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001577
Senthil Kumaran277e9092013-04-10 20:51:19 -07001578 def test_converting_when_no_drive_letter(self):
1579 # cannot end a raw string in \
1580 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1581 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1582
1583 def test_simple_compare(self):
1584 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1585 r'C:\foo\bar\spam.foo')
1586
1587 def test_non_ascii_drive_letter(self):
1588 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1589
1590 def test_roundtrip_url2pathname(self):
1591 list_of_paths = ['C:',
1592 r'\\\C\test\\',
1593 r'C:\foo\bar\spam.foo'
1594 ]
1595 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001596 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001597
1598class PathName2URLTests(unittest.TestCase):
1599
1600 def test_converting_drive_letter(self):
1601 self.assertEqual(pathname2url("C:"), '///C:')
1602 self.assertEqual(pathname2url("C:\\"), '///C:')
1603
1604 def test_converting_when_no_drive_letter(self):
1605 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1606 '/////folder/test/')
1607 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1608 '////folder/test/')
1609 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1610 '/folder/test/')
1611
1612 def test_simple_compare(self):
1613 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1614 "///C:/foo/bar/spam.foo" )
1615
1616 def test_long_drive_letter(self):
1617 self.assertRaises(IOError, pathname2url, "XX:\\")
1618
1619 def test_roundtrip_pathname2url(self):
1620 list_of_paths = ['///C:',
1621 '/////folder/test/',
1622 '///C:/foo/bar/spam.foo']
1623 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001624 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001625
1626if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001627 unittest.main()