blob: 8895421f97fc38a2a6e1d6d5d3320590fd2e3f10 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Victor Stinnereb976e42019-06-12 04:07:38 +020059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Victor Stinnereb976e42019-06-12 04:07:38 +020093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Victor Stinnereb976e42019-06-12 04:07:38 +0200107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Victor Stinnereb976e42019-06-12 04:07:38 +0200110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def tearDown(self):
157 """Shut down the open object"""
158 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000159 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_interface(self):
162 # Make sure object returned by urlopen() has the specified methods
163 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000164 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000165 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "object returned by urlopen() lacks %s attribute" %
167 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_read(self):
170 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_readline(self):
173 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000174 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 "calling readline() after exhausting the file did not"
176 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_readlines(self):
179 lines_list = self.returned_obj.readlines()
180 self.assertEqual(len(lines_list), 1,
181 "readlines() returned the wrong number of lines")
182 self.assertEqual(lines_list[0], self.text,
183 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 def test_fileno(self):
186 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000187 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 self.assertEqual(os.read(file_num, len(self.text)), self.text,
189 "Reading on the file descriptor returned by fileno() "
190 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800193 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000194 # by the tearDown() method for the test
195 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000198 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000199
Brett Cannon74bfd702003-04-25 09:39:47 +0000200 def test_geturl(self):
201 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000202
Christian Heimes9bd667a2008-01-20 15:14:11 +0000203 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000204 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000205
Brett Cannon74bfd702003-04-25 09:39:47 +0000206 def test_iter(self):
207 # Test iterator
208 # Don't need to count number of iterations since test would fail the
209 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200210 # comparison.
211 # Use the iterator in the usual implicit way to test for ticket #4608.
212 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000213 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000214
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800215 def test_relativelocalfile(self):
216 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
217
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700218
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000219class ProxyTests(unittest.TestCase):
220
221 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000222 # Records changes to env vars
223 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000224 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000225 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000226 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000227 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000228
229 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000230 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000231 self.env.__exit__()
232 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000233
234 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000235 self.env.set('NO_PROXY', 'localhost')
236 proxies = urllib.request.getproxies_environment()
237 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000238 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800239 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700240 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800241 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700242 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
243 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
244
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700245 def test_proxy_cgi_ignore(self):
246 try:
247 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
248 proxies = urllib.request.getproxies_environment()
249 self.assertEqual('http://somewhere:3128', proxies['http'])
250 self.env.set('REQUEST_METHOD', 'GET')
251 proxies = urllib.request.getproxies_environment()
252 self.assertNotIn('http', proxies)
253 finally:
254 self.env.unset('REQUEST_METHOD')
255 self.env.unset('HTTP_PROXY')
256
Martin Panteraa279822016-04-30 01:03:40 +0000257 def test_proxy_bypass_environment_host_match(self):
258 bypass = urllib.request.proxy_bypass_environment
259 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800260 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000261 self.assertTrue(bypass('localhost'))
262 self.assertTrue(bypass('LocalHost')) # MixedCase
263 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
264 self.assertTrue(bypass('newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800265 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Martin Panteraa279822016-04-30 01:03:40 +0000266 self.assertTrue(bypass('anotherdomain.com:8888'))
267 self.assertTrue(bypass('www.newdomain.com:1234'))
268 self.assertFalse(bypass('prelocalhost'))
269 self.assertFalse(bypass('newdomain.com')) # no port
270 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700271
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700272
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700273class ProxyTests_withOrderedEnv(unittest.TestCase):
274
275 def setUp(self):
276 # We need to test conditions, where variable order _is_ significant
277 self._saved_env = os.environ
278 # Monkey patch os.environ, start with empty fake environment
279 os.environ = collections.OrderedDict()
280
281 def tearDown(self):
282 os.environ = self._saved_env
283
284 def test_getproxies_environment_prefer_lowercase(self):
285 # Test lowercase preference with removal
286 os.environ['no_proxy'] = ''
287 os.environ['No_Proxy'] = 'localhost'
288 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
289 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
290 os.environ['http_proxy'] = ''
291 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
292 proxies = urllib.request.getproxies_environment()
293 self.assertEqual({}, proxies)
294 # Test lowercase preference of proxy bypass and correct matching including ports
295 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
296 os.environ['No_Proxy'] = 'xyz.com'
297 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
298 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
299 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
300 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
301 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
302 # Test lowercase preference with replacement
303 os.environ['http_proxy'] = 'http://somewhere:3128'
304 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
305 proxies = urllib.request.getproxies_environment()
306 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000307
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700308
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700309class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000310 """Test urlopen() opening a fake http connection."""
311
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000312 def check_read(self, ver):
313 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000314 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000315 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000316 self.assertEqual(fp.readline(), b"Hello!")
317 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000318 self.assertEqual(fp.geturl(), 'http://python.org/')
319 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000320 finally:
321 self.unfakehttp()
322
Senthil Kumaran26430412011-04-13 07:01:19 +0800323 def test_url_fragment(self):
324 # Issue #11703: geturl() omits fragments in the original URL.
325 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800326 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800327 try:
328 fp = urllib.request.urlopen(url)
329 self.assertEqual(fp.geturl(), url)
330 finally:
331 self.unfakehttp()
332
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800333 def test_willclose(self):
334 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800335 try:
336 resp = urlopen("http://www.python.org")
337 self.assertTrue(resp.fp.will_close)
338 finally:
339 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800340
Xtreak2fc936e2019-05-01 17:29:49 +0530341 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700342 def test_url_with_control_char_rejected(self):
343 for char_no in list(range(0, 0x21)) + [0x7f]:
344 char = chr(char_no)
345 schemeless_url = f"//localhost:7777/test{char}/"
346 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
347 try:
348 # We explicitly test urllib.request.urlopen() instead of the top
349 # level 'def urlopen()' function defined in this... (quite ugly)
350 # test suite. They use different url opening codepaths. Plain
351 # urlopen uses FancyURLOpener which goes via a codepath that
352 # calls urllib.parse.quote() on the URL which makes all of the
353 # above attempts at injection within the url _path_ safe.
354 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400355 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700356 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400357 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700358 urllib.request.urlopen(f"http:{schemeless_url}")
359 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400360 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700361 urllib.request.urlopen(f"https:{schemeless_url}")
362 # This code path quotes the URL so there is no injection.
363 resp = urlopen(f"http:{schemeless_url}")
364 self.assertNotIn(char, resp.geturl())
365 finally:
366 self.unfakehttp()
367
Xtreak2fc936e2019-05-01 17:29:49 +0530368 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700369 def test_url_with_newline_header_injection_rejected(self):
370 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
371 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
372 schemeless_url = "//" + host + ":8080/test/?test=a"
373 try:
374 # We explicitly test urllib.request.urlopen() instead of the top
375 # level 'def urlopen()' function defined in this... (quite ugly)
376 # test suite. They use different url opening codepaths. Plain
377 # urlopen uses FancyURLOpener which goes via a codepath that
378 # calls urllib.parse.quote() on the URL which makes all of the
379 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400380 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700381 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400382 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700383 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400384 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700385 urllib.request.urlopen(f"https:{schemeless_url}")
386 # This code path quotes the URL so there is no injection.
387 resp = urlopen(f"http:{schemeless_url}")
388 self.assertNotIn(' ', resp.geturl())
389 self.assertNotIn('\r', resp.geturl())
390 self.assertNotIn('\n', resp.geturl())
391 finally:
392 self.unfakehttp()
393
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000394 def test_read_0_9(self):
395 # "0.9" response accepted (but not "simple responses" without
396 # a status line)
397 self.check_read(b"0.9")
398
399 def test_read_1_0(self):
400 self.check_read(b"1.0")
401
402 def test_read_1_1(self):
403 self.check_read(b"1.1")
404
Christian Heimes57dddfb2008-01-02 18:30:52 +0000405 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200406 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000407 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
408Date: Wed, 02 Jan 2008 03:03:54 GMT
409Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
410Connection: close
411Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200412''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000413 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200414 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000415 finally:
416 self.unfakehttp()
417
guido@google.coma119df92011-03-29 11:41:02 -0700418 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200419 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700420 self.fakehttp(b'''HTTP/1.1 302 Found
421Date: Wed, 02 Jan 2008 03:03:54 GMT
422Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
423Location: file://guidocomputer.athome.com:/python/license
424Connection: close
425Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200426''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700427 try:
Martin Pantera0370222016-02-04 06:01:35 +0000428 msg = "Redirection to url 'file:"
429 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
430 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700431 finally:
432 self.unfakehttp()
433
Martin Pantera0370222016-02-04 06:01:35 +0000434 def test_redirect_limit_independent(self):
435 # Ticket #12923: make sure independent requests each use their
436 # own retry limit.
437 for i in range(FancyURLopener().maxtries):
438 self.fakehttp(b'''HTTP/1.1 302 Found
439Location: file://guidocomputer.athome.com:/python/license
440Connection: close
Victor Stinnereb976e42019-06-12 04:07:38 +0200441''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000442 try:
443 self.assertRaises(urllib.error.HTTPError, urlopen,
444 "http://something")
445 finally:
446 self.unfakehttp()
447
Guido van Rossumd8faa362007-04-27 19:54:29 +0000448 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200449 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000450 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000451 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000452 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200453 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000454 finally:
455 self.unfakehttp()
456
Senthil Kumaranf5776862012-10-21 13:30:02 -0700457 def test_missing_localfile(self):
458 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700459 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700460 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700461 self.assertTrue(e.exception.filename)
462 self.assertTrue(e.exception.reason)
463
464 def test_file_notexists(self):
465 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700466 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700467 try:
468 self.assertTrue(os.path.exists(tmp_file))
469 with urlopen(tmp_fileurl) as fobj:
470 self.assertTrue(fobj)
471 finally:
472 os.close(fd)
473 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700474 self.assertFalse(os.path.exists(tmp_file))
475 with self.assertRaises(urllib.error.URLError):
476 urlopen(tmp_fileurl)
477
478 def test_ftp_nohost(self):
479 test_ftp_url = 'ftp:///path'
480 with self.assertRaises(urllib.error.URLError) as e:
481 urlopen(test_ftp_url)
482 self.assertFalse(e.exception.filename)
483 self.assertTrue(e.exception.reason)
484
485 def test_ftp_nonexisting(self):
486 with self.assertRaises(urllib.error.URLError) as e:
487 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
488 self.assertFalse(e.exception.filename)
489 self.assertTrue(e.exception.reason)
490
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700491 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
492 def test_ftp_cache_pruning(self):
493 self.fakeftp()
494 try:
495 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
496 urlopen('ftp://localhost')
497 finally:
498 self.unfakeftp()
499
Senthil Kumarande0eb242010-08-01 17:53:37 +0000500 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000501 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000502 try:
503 fp = urlopen("http://user:pass@python.org/")
504 self.assertEqual(fp.readline(), b"Hello!")
505 self.assertEqual(fp.readline(), b"")
506 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
507 self.assertEqual(fp.getcode(), 200)
508 finally:
509 self.unfakehttp()
510
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800511 def test_userpass_inurl_w_spaces(self):
512 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
513 try:
514 userpass = "a b:c d"
515 url = "http://{}@python.org/".format(userpass)
516 fakehttp_wrapper = http.client.HTTPConnection
517 authorization = ("Authorization: Basic %s\r\n" %
518 b64encode(userpass.encode("ASCII")).decode("ASCII"))
519 fp = urlopen(url)
520 # The authorization header must be in place
521 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
522 self.assertEqual(fp.readline(), b"Hello!")
523 self.assertEqual(fp.readline(), b"")
524 # the spaces are quoted in URL so no match
525 self.assertNotEqual(fp.geturl(), url)
526 self.assertEqual(fp.getcode(), 200)
527 finally:
528 self.unfakehttp()
529
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700530 def test_URLopener_deprecation(self):
531 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700532 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700533
Antoine Pitrou07df6552014-11-02 17:23:14 +0100534 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800535 def test_cafile_and_context(self):
536 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200537 with support.check_warnings(('', DeprecationWarning)):
538 with self.assertRaises(ValueError):
539 urllib.request.urlopen(
540 "https://localhost", cafile="/nonexistent/path", context=context
541 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800542
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700543
Antoine Pitroudf204be2012-11-24 17:59:08 +0100544class urlopen_DataTests(unittest.TestCase):
545 """Test urlopen() opening a data URL."""
546
547 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200548 # clear _opener global variable
549 self.addCleanup(urllib.request.urlcleanup)
550
Antoine Pitroudf204be2012-11-24 17:59:08 +0100551 # text containing URL special- and unicode-characters
552 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
553 # 2x1 pixel RGB PNG image with one black and one white pixel
554 self.image = (
555 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
556 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
557 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
558 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
559
560 self.text_url = (
561 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
562 "D%26%20%C3%B6%20%C3%84%20")
563 self.text_url_base64 = (
564 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
565 "sJT0mIPYgxCA%3D")
566 # base64 encoded data URL that contains ignorable spaces,
567 # such as "\n", " ", "%0A", and "%20".
568 self.image_url = (
569 "\n"
570 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
571 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
572
573 self.text_url_resp = urllib.request.urlopen(self.text_url)
574 self.text_url_base64_resp = urllib.request.urlopen(
575 self.text_url_base64)
576 self.image_url_resp = urllib.request.urlopen(self.image_url)
577
578 def test_interface(self):
579 # Make sure object returned by urlopen() has the specified methods
580 for attr in ("read", "readline", "readlines",
581 "close", "info", "geturl", "getcode", "__iter__"):
582 self.assertTrue(hasattr(self.text_url_resp, attr),
583 "object returned by urlopen() lacks %s attribute" %
584 attr)
585
586 def test_info(self):
587 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
588 self.assertEqual(self.text_url_base64_resp.info().get_params(),
589 [('text/plain', ''), ('charset', 'ISO-8859-1')])
590 self.assertEqual(self.image_url_resp.info()['content-length'],
591 str(len(self.image)))
592 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
593 [('text/plain', ''), ('charset', 'US-ASCII')])
594
595 def test_geturl(self):
596 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
597 self.assertEqual(self.text_url_base64_resp.geturl(),
598 self.text_url_base64)
599 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
600
601 def test_read_text(self):
602 self.assertEqual(self.text_url_resp.read().decode(
603 dict(self.text_url_resp.info().get_params())['charset']), self.text)
604
605 def test_read_text_base64(self):
606 self.assertEqual(self.text_url_base64_resp.read().decode(
607 dict(self.text_url_base64_resp.info().get_params())['charset']),
608 self.text)
609
610 def test_read_image(self):
611 self.assertEqual(self.image_url_resp.read(), self.image)
612
613 def test_missing_comma(self):
614 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
615
616 def test_invalid_base64_data(self):
617 # missing padding character
618 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
619
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700620
Brett Cannon19691362003-04-29 05:08:06 +0000621class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000622 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000623
Brett Cannon19691362003-04-29 05:08:06 +0000624 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200625 # clear _opener global variable
626 self.addCleanup(urllib.request.urlcleanup)
627
Georg Brandl5a650a22005-08-26 08:51:34 +0000628 # Create a list of temporary files. Each item in the list is a file
629 # name (absolute path or relative to the current working directory).
630 # All files in this list will be deleted in the tearDown method. Note,
631 # this only helps to makes sure temporary files get deleted, but it
632 # does nothing about trying to close files that may still be open. It
633 # is the responsibility of the developer to properly close files even
634 # when exceptional conditions occur.
635 self.tempFiles = []
636
Brett Cannon19691362003-04-29 05:08:06 +0000637 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000638 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000639 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000640 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000641 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000642 FILE.write(self.text)
643 FILE.close()
644 finally:
645 try: FILE.close()
646 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000647
648 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000649 # Delete the temporary files.
650 for each in self.tempFiles:
651 try: os.remove(each)
652 except: pass
653
654 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000655 filePath = os.path.abspath(filePath)
656 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000657 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000658 except UnicodeEncodeError:
659 raise unittest.SkipTest("filePath is not encodable to utf8")
660 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000661
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000662 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000663 """Creates a new temporary file containing the specified data,
664 registers the file for deletion during the test fixture tear down, and
665 returns the absolute path of the file."""
666
667 newFd, newFilePath = tempfile.mkstemp()
668 try:
669 self.registerFileForCleanUp(newFilePath)
670 newFile = os.fdopen(newFd, "wb")
671 newFile.write(data)
672 newFile.close()
673 finally:
674 try: newFile.close()
675 except: pass
676 return newFilePath
677
678 def registerFileForCleanUp(self, fileName):
679 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000680
681 def test_basic(self):
682 # Make sure that a local file just gets its own location returned and
683 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000684 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000685 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000686 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000687 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000688 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000689
690 def test_copy(self):
691 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000692 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000693 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000694 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000695 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000696 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000697 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000698 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000699 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000700 try:
701 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000702 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000703 finally:
704 try: FILE.close()
705 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000706 self.assertEqual(self.text, text)
707
708 def test_reporthook(self):
709 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700710 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
711 self.assertIsInstance(block_count, int)
712 self.assertIsInstance(block_read_size, int)
713 self.assertIsInstance(file_size, int)
714 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000715 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000716 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000717 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000718 urllib.request.urlretrieve(
719 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000720 second_temp, hooktester)
721
722 def test_reporthook_0_bytes(self):
723 # Test on zero length file. Should call reporthook only 1 time.
724 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700725 def hooktester(block_count, block_read_size, file_size, _report=report):
726 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000727 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000728 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000729 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000730 self.assertEqual(len(report), 1)
731 self.assertEqual(report[0][2], 0)
732
733 def test_reporthook_5_bytes(self):
734 # Test on 5 byte file. Should call reporthook only 2 times (once when
735 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700736 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000737 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700738 def hooktester(block_count, block_read_size, file_size, _report=report):
739 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000740 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000741 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000742 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000743 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800744 self.assertEqual(report[0][2], 5)
745 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000746
747 def test_reporthook_8193_bytes(self):
748 # Test on 8193 byte file. Should call reporthook only 3 times (once
749 # when the "network connection" is established, once for the next 8192
750 # bytes, and once for the last byte).
751 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700752 def hooktester(block_count, block_read_size, file_size, _report=report):
753 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000754 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000755 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000756 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000757 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800758 self.assertEqual(report[0][2], 8193)
759 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700760 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800761 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000762
Senthil Kumarance260142011-11-01 01:35:17 +0800763
764class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
765 """Test urllib.urlretrieve() using fake http connections"""
766
767 def test_short_content_raises_ContentTooShortError(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200768 self.addCleanup(urllib.request.urlcleanup)
769
Senthil Kumarance260142011-11-01 01:35:17 +0800770 self.fakehttp(b'''HTTP/1.1 200 OK
771Date: Wed, 02 Jan 2008 03:03:54 GMT
772Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
773Connection: close
774Content-Length: 100
775Content-Type: text/html; charset=iso-8859-1
776
777FF
778''')
779
780 def _reporthook(par1, par2, par3):
781 pass
782
783 with self.assertRaises(urllib.error.ContentTooShortError):
784 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100785 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800786 reporthook=_reporthook)
787 finally:
788 self.unfakehttp()
789
790 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200791 self.addCleanup(urllib.request.urlcleanup)
792
Senthil Kumarance260142011-11-01 01:35:17 +0800793 self.fakehttp(b'''HTTP/1.1 200 OK
794Date: Wed, 02 Jan 2008 03:03:54 GMT
795Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
796Connection: close
797Content-Length: 100
798Content-Type: text/html; charset=iso-8859-1
799
800FF
801''')
802 with self.assertRaises(urllib.error.ContentTooShortError):
803 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100804 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800805 finally:
806 self.unfakehttp()
807
808
Brett Cannon74bfd702003-04-25 09:39:47 +0000809class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400810 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000811
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530812 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000813 character you write it as '%' + <2 character US-ASCII hex value>.
814 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
815 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000816
817 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000818
Brett Cannon74bfd702003-04-25 09:39:47 +0000819 Reserved characters : ";/?:@&=+$,"
820 Have special meaning in URIs and must be escaped if not being used for
821 their special meaning
822 Data characters : letters, digits, and "-_.!~*'()"
823 Unreserved and do not need to be escaped; can be, though, if desired
824 Control characters : 0x00 - 0x1F, 0x7F
825 Have no use in URIs so must be escaped
826 space : 0x20
827 Must be escaped
828 Delimiters : '<>#%"'
829 Must be escaped
830 Unwise : "{}|\^[]`"
831 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000832
Brett Cannon74bfd702003-04-25 09:39:47 +0000833 """
834
835 def test_never_quote(self):
836 # Make sure quote() does not quote letters, digits, and "_,.-"
837 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
838 "abcdefghijklmnopqrstuvwxyz",
839 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530840 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000841 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000842 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000843 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000844 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000845 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000846 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000847
848 def test_default_safe(self):
849 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000850 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000851
852 def test_safe(self):
853 # Test setting 'safe' parameter does what it should do
854 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000855 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000856 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000857 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000858 result = urllib.parse.quote_plus(quote_by_default,
859 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000860 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000861 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000862 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000863 # Safe expressed as bytes rather than str
864 result = urllib.parse.quote(quote_by_default, safe=b"<>")
865 self.assertEqual(quote_by_default, result,
866 "using quote(): %r != %r" % (quote_by_default, result))
867 # "Safe" non-ASCII characters should have no effect
868 # (Since URIs are not allowed to have non-ASCII characters)
869 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
870 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
871 self.assertEqual(expect, result,
872 "using quote(): %r != %r" %
873 (expect, result))
874 # Same as above, but using a bytes rather than str
875 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
876 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
877 self.assertEqual(expect, result,
878 "using quote(): %r != %r" %
879 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000880
881 def test_default_quoting(self):
882 # Make sure all characters that should be quoted are by default sans
883 # space (separate test for that).
884 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400885 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000886 should_quote.append(chr(127)) # For 0x7F
887 should_quote = ''.join(should_quote)
888 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000889 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000890 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000891 "using quote(): "
892 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000893 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000894 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000895 self.assertEqual(hexescape(char), result,
896 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000897 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000898 (char, hexescape(char), result))
899 del should_quote
900 partial_quote = "ab[]cd"
901 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000902 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000903 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000904 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800905 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000906 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000907 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000908
909 def test_quoting_space(self):
910 # Make sure quote() and quote_plus() handle spaces as specified in
911 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000912 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000913 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000914 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000915 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000916 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000917 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000918 given = "a b cd e f"
919 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000920 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000921 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000922 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000923 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000924 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000925 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000926 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000927
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000928 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000929 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000930 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000931 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000932 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000933 # Test with bytes
934 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
935 'alpha%2Bbeta+gamma')
936 # Test with safe bytes
937 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
938 'alpha+beta+gamma')
939
940 def test_quote_bytes(self):
941 # Bytes should quote directly to percent-encoded values
942 given = b"\xa2\xd8ab\xff"
943 expect = "%A2%D8ab%FF"
944 result = urllib.parse.quote(given)
945 self.assertEqual(expect, result,
946 "using quote(): %r != %r" % (expect, result))
947 # Encoding argument should raise type error on bytes input
948 self.assertRaises(TypeError, urllib.parse.quote, given,
949 encoding="latin-1")
950 # quote_from_bytes should work the same
951 result = urllib.parse.quote_from_bytes(given)
952 self.assertEqual(expect, result,
953 "using quote_from_bytes(): %r != %r"
954 % (expect, result))
955
956 def test_quote_with_unicode(self):
957 # Characters in Latin-1 range, encoded by default in UTF-8
958 given = "\xa2\xd8ab\xff"
959 expect = "%C2%A2%C3%98ab%C3%BF"
960 result = urllib.parse.quote(given)
961 self.assertEqual(expect, result,
962 "using quote(): %r != %r" % (expect, result))
963 # Characters in Latin-1 range, encoded by with None (default)
964 result = urllib.parse.quote(given, encoding=None, errors=None)
965 self.assertEqual(expect, result,
966 "using quote(): %r != %r" % (expect, result))
967 # Characters in Latin-1 range, encoded with Latin-1
968 given = "\xa2\xd8ab\xff"
969 expect = "%A2%D8ab%FF"
970 result = urllib.parse.quote(given, encoding="latin-1")
971 self.assertEqual(expect, result,
972 "using quote(): %r != %r" % (expect, result))
973 # Characters in BMP, encoded by default in UTF-8
974 given = "\u6f22\u5b57" # "Kanji"
975 expect = "%E6%BC%A2%E5%AD%97"
976 result = urllib.parse.quote(given)
977 self.assertEqual(expect, result,
978 "using quote(): %r != %r" % (expect, result))
979 # Characters in BMP, encoded with Latin-1
980 given = "\u6f22\u5b57"
981 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
982 encoding="latin-1")
983 # Characters in BMP, encoded with Latin-1, with replace error handling
984 given = "\u6f22\u5b57"
985 expect = "%3F%3F" # "??"
986 result = urllib.parse.quote(given, encoding="latin-1",
987 errors="replace")
988 self.assertEqual(expect, result,
989 "using quote(): %r != %r" % (expect, result))
990 # Characters in BMP, Latin-1, with xmlcharref error handling
991 given = "\u6f22\u5b57"
992 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
993 result = urllib.parse.quote(given, encoding="latin-1",
994 errors="xmlcharrefreplace")
995 self.assertEqual(expect, result,
996 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000997
Georg Brandlfaf41492009-05-26 18:31:11 +0000998 def test_quote_plus_with_unicode(self):
999 # Encoding (latin-1) test for quote_plus
1000 given = "\xa2\xd8 \xff"
1001 expect = "%A2%D8+%FF"
1002 result = urllib.parse.quote_plus(given, encoding="latin-1")
1003 self.assertEqual(expect, result,
1004 "using quote_plus(): %r != %r" % (expect, result))
1005 # Errors test for quote_plus
1006 given = "ab\u6f22\u5b57 cd"
1007 expect = "ab%3F%3F+cd"
1008 result = urllib.parse.quote_plus(given, encoding="latin-1",
1009 errors="replace")
1010 self.assertEqual(expect, result,
1011 "using quote_plus(): %r != %r" % (expect, result))
1012
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001013
Brett Cannon74bfd702003-04-25 09:39:47 +00001014class UnquotingTests(unittest.TestCase):
1015 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001016
Brett Cannon74bfd702003-04-25 09:39:47 +00001017 See the doc string for quoting_Tests for details on quoting and such.
1018
1019 """
1020
1021 def test_unquoting(self):
1022 # Make sure unquoting of all ASCII values works
1023 escape_list = []
1024 for num in range(128):
1025 given = hexescape(chr(num))
1026 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001027 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001028 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001029 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001030 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001031 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001032 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001033 (expect, result))
1034 escape_list.append(given)
1035 escape_string = ''.join(escape_list)
1036 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001037 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001038 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001039 "using unquote(): not all characters escaped: "
1040 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001041 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1042 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +00001043 with support.check_warnings(('', BytesWarning), quiet=True):
1044 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +00001045
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001046 def test_unquoting_badpercent(self):
1047 # Test unquoting on bad percent-escapes
1048 given = '%xab'
1049 expect = given
1050 result = urllib.parse.unquote(given)
1051 self.assertEqual(expect, result, "using unquote(): %r != %r"
1052 % (expect, result))
1053 given = '%x'
1054 expect = given
1055 result = urllib.parse.unquote(given)
1056 self.assertEqual(expect, result, "using unquote(): %r != %r"
1057 % (expect, result))
1058 given = '%'
1059 expect = given
1060 result = urllib.parse.unquote(given)
1061 self.assertEqual(expect, result, "using unquote(): %r != %r"
1062 % (expect, result))
1063 # unquote_to_bytes
1064 given = '%xab'
1065 expect = bytes(given, 'ascii')
1066 result = urllib.parse.unquote_to_bytes(given)
1067 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1068 % (expect, result))
1069 given = '%x'
1070 expect = bytes(given, 'ascii')
1071 result = urllib.parse.unquote_to_bytes(given)
1072 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1073 % (expect, result))
1074 given = '%'
1075 expect = bytes(given, 'ascii')
1076 result = urllib.parse.unquote_to_bytes(given)
1077 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1078 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001079 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1080 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001081
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001082 def test_unquoting_mixed_case(self):
1083 # Test unquoting on mixed-case hex digits in the percent-escapes
1084 given = '%Ab%eA'
1085 expect = b'\xab\xea'
1086 result = urllib.parse.unquote_to_bytes(given)
1087 self.assertEqual(expect, result,
1088 "using unquote_to_bytes(): %r != %r"
1089 % (expect, result))
1090
Brett Cannon74bfd702003-04-25 09:39:47 +00001091 def test_unquoting_parts(self):
1092 # Make sure unquoting works when have non-quoted characters
1093 # interspersed
1094 given = 'ab%sd' % hexescape('c')
1095 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001096 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001097 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001098 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001099 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001100 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001101 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001102
Brett Cannon74bfd702003-04-25 09:39:47 +00001103 def test_unquoting_plus(self):
1104 # Test difference between unquote() and unquote_plus()
1105 given = "are+there+spaces..."
1106 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001107 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001108 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001109 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001110 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001111 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001112 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001113 "using unquote_plus(): %r != %r" % (expect, result))
1114
1115 def test_unquote_to_bytes(self):
1116 given = 'br%C3%BCckner_sapporo_20050930.doc'
1117 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1118 result = urllib.parse.unquote_to_bytes(given)
1119 self.assertEqual(expect, result,
1120 "using unquote_to_bytes(): %r != %r"
1121 % (expect, result))
1122 # Test on a string with unescaped non-ASCII characters
1123 # (Technically an invalid URI; expect those characters to be UTF-8
1124 # encoded).
1125 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1126 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1127 self.assertEqual(expect, result,
1128 "using unquote_to_bytes(): %r != %r"
1129 % (expect, result))
1130 # Test with a bytes as input
1131 given = b'%A2%D8ab%FF'
1132 expect = b'\xa2\xd8ab\xff'
1133 result = urllib.parse.unquote_to_bytes(given)
1134 self.assertEqual(expect, result,
1135 "using unquote_to_bytes(): %r != %r"
1136 % (expect, result))
1137 # Test with a bytes as input, with unescaped non-ASCII bytes
1138 # (Technically an invalid URI; expect those bytes to be preserved)
1139 given = b'%A2\xd8ab%FF'
1140 expect = b'\xa2\xd8ab\xff'
1141 result = urllib.parse.unquote_to_bytes(given)
1142 self.assertEqual(expect, result,
1143 "using unquote_to_bytes(): %r != %r"
1144 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001145
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001146 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001147 # Characters in the Latin-1 range, encoded with UTF-8
1148 given = 'br%C3%BCckner_sapporo_20050930.doc'
1149 expect = 'br\u00fcckner_sapporo_20050930.doc'
1150 result = urllib.parse.unquote(given)
1151 self.assertEqual(expect, result,
1152 "using unquote(): %r != %r" % (expect, result))
1153 # Characters in the Latin-1 range, encoded with None (default)
1154 result = urllib.parse.unquote(given, encoding=None, errors=None)
1155 self.assertEqual(expect, result,
1156 "using unquote(): %r != %r" % (expect, result))
1157
1158 # Characters in the Latin-1 range, encoded with Latin-1
1159 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1160 encoding="latin-1")
1161 expect = 'br\u00fcckner_sapporo_20050930.doc'
1162 self.assertEqual(expect, result,
1163 "using unquote(): %r != %r" % (expect, result))
1164
1165 # Characters in BMP, encoded with UTF-8
1166 given = "%E6%BC%A2%E5%AD%97"
1167 expect = "\u6f22\u5b57" # "Kanji"
1168 result = urllib.parse.unquote(given)
1169 self.assertEqual(expect, result,
1170 "using unquote(): %r != %r" % (expect, result))
1171
1172 # Decode with UTF-8, invalid sequence
1173 given = "%F3%B1"
1174 expect = "\ufffd" # Replacement character
1175 result = urllib.parse.unquote(given)
1176 self.assertEqual(expect, result,
1177 "using unquote(): %r != %r" % (expect, result))
1178
1179 # Decode with UTF-8, invalid sequence, replace errors
1180 result = urllib.parse.unquote(given, errors="replace")
1181 self.assertEqual(expect, result,
1182 "using unquote(): %r != %r" % (expect, result))
1183
1184 # Decode with UTF-8, invalid sequence, ignoring errors
1185 given = "%F3%B1"
1186 expect = ""
1187 result = urllib.parse.unquote(given, errors="ignore")
1188 self.assertEqual(expect, result,
1189 "using unquote(): %r != %r" % (expect, result))
1190
1191 # A mix of non-ASCII and percent-encoded characters, UTF-8
1192 result = urllib.parse.unquote("\u6f22%C3%BC")
1193 expect = '\u6f22\u00fc'
1194 self.assertEqual(expect, result,
1195 "using unquote(): %r != %r" % (expect, result))
1196
1197 # A mix of non-ASCII and percent-encoded characters, Latin-1
1198 # (Note, the string contains non-Latin-1-representable characters)
1199 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1200 expect = '\u6f22\u00fc'
1201 self.assertEqual(expect, result,
1202 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001203
Brett Cannon74bfd702003-04-25 09:39:47 +00001204class urlencode_Tests(unittest.TestCase):
1205 """Tests for urlencode()"""
1206
1207 def help_inputtype(self, given, test_type):
1208 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001209
Brett Cannon74bfd702003-04-25 09:39:47 +00001210 'given' must lead to only the pairs:
1211 * 1st, 1
1212 * 2nd, 2
1213 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001214
Brett Cannon74bfd702003-04-25 09:39:47 +00001215 Test cannot assume anything about order. Docs make no guarantee and
1216 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001217
Brett Cannon74bfd702003-04-25 09:39:47 +00001218 """
1219 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001220 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001221 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001222 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001223 "testing %s: %s not found in %s" %
1224 (test_type, expected, result))
1225 self.assertEqual(result.count('&'), 2,
1226 "testing %s: expected 2 '&'s; got %s" %
1227 (test_type, result.count('&')))
1228 amp_location = result.index('&')
1229 on_amp_left = result[amp_location - 1]
1230 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001231 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001232 "testing %s: '&' not located in proper place in %s" %
1233 (test_type, result))
1234 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1235 "testing %s: "
1236 "unexpected number of characters: %s != %s" %
1237 (test_type, len(result), (5 * 3) + 2))
1238
1239 def test_using_mapping(self):
1240 # Test passing in a mapping object as an argument.
1241 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1242 "using dict as input type")
1243
1244 def test_using_sequence(self):
1245 # Test passing in a sequence of two-item sequences as an argument.
1246 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1247 "using sequence of two-item tuples as input")
1248
1249 def test_quoting(self):
1250 # Make sure keys and values are quoted using quote_plus()
1251 given = {"&":"="}
1252 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001253 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001254 self.assertEqual(expect, result)
1255 given = {"key name":"A bunch of pluses"}
1256 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001257 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001258 self.assertEqual(expect, result)
1259
1260 def test_doseq(self):
1261 # Test that passing True for 'doseq' parameter works correctly
1262 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001263 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1264 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001265 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001266 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001267 for value in given["sequence"]:
1268 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001269 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001270 self.assertEqual(result.count('&'), 2,
1271 "Expected 2 '&'s, got %s" % result.count('&'))
1272
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001273 def test_empty_sequence(self):
1274 self.assertEqual("", urllib.parse.urlencode({}))
1275 self.assertEqual("", urllib.parse.urlencode([]))
1276
1277 def test_nonstring_values(self):
1278 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1279 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1280
1281 def test_nonstring_seq_values(self):
1282 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1283 self.assertEqual("a=None&a=a",
1284 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001285 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001286 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001287 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001288
Senthil Kumarandf022da2010-07-03 17:48:22 +00001289 def test_urlencode_encoding(self):
1290 # ASCII encoding. Expect %3F with errors="replace'
1291 given = (('\u00a0', '\u00c1'),)
1292 expect = '%3F=%3F'
1293 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1294 self.assertEqual(expect, result)
1295
1296 # Default is UTF-8 encoding.
1297 given = (('\u00a0', '\u00c1'),)
1298 expect = '%C2%A0=%C3%81'
1299 result = urllib.parse.urlencode(given)
1300 self.assertEqual(expect, result)
1301
1302 # Latin-1 encoding.
1303 given = (('\u00a0', '\u00c1'),)
1304 expect = '%A0=%C1'
1305 result = urllib.parse.urlencode(given, encoding="latin-1")
1306 self.assertEqual(expect, result)
1307
1308 def test_urlencode_encoding_doseq(self):
1309 # ASCII Encoding. Expect %3F with errors="replace'
1310 given = (('\u00a0', '\u00c1'),)
1311 expect = '%3F=%3F'
1312 result = urllib.parse.urlencode(given, doseq=True,
1313 encoding="ASCII", errors="replace")
1314 self.assertEqual(expect, result)
1315
1316 # ASCII Encoding. On a sequence of values.
1317 given = (("\u00a0", (1, "\u00c1")),)
1318 expect = '%3F=1&%3F=%3F'
1319 result = urllib.parse.urlencode(given, True,
1320 encoding="ASCII", errors="replace")
1321 self.assertEqual(expect, result)
1322
1323 # Utf-8
1324 given = (("\u00a0", "\u00c1"),)
1325 expect = '%C2%A0=%C3%81'
1326 result = urllib.parse.urlencode(given, True)
1327 self.assertEqual(expect, result)
1328
1329 given = (("\u00a0", (42, "\u00c1")),)
1330 expect = '%C2%A0=42&%C2%A0=%C3%81'
1331 result = urllib.parse.urlencode(given, True)
1332 self.assertEqual(expect, result)
1333
1334 # latin-1
1335 given = (("\u00a0", "\u00c1"),)
1336 expect = '%A0=%C1'
1337 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1338 self.assertEqual(expect, result)
1339
1340 given = (("\u00a0", (42, "\u00c1")),)
1341 expect = '%A0=42&%A0=%C1'
1342 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1343 self.assertEqual(expect, result)
1344
1345 def test_urlencode_bytes(self):
1346 given = ((b'\xa0\x24', b'\xc1\x24'),)
1347 expect = '%A0%24=%C1%24'
1348 result = urllib.parse.urlencode(given)
1349 self.assertEqual(expect, result)
1350 result = urllib.parse.urlencode(given, True)
1351 self.assertEqual(expect, result)
1352
1353 # Sequence of values
1354 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1355 expect = '%A0%24=42&%A0%24=%C1%24'
1356 result = urllib.parse.urlencode(given, True)
1357 self.assertEqual(expect, result)
1358
1359 def test_urlencode_encoding_safe_parameter(self):
1360
1361 # Send '$' (\x24) as safe character
1362 # Default utf-8 encoding
1363
1364 given = ((b'\xa0\x24', b'\xc1\x24'),)
1365 result = urllib.parse.urlencode(given, safe=":$")
1366 expect = '%A0$=%C1$'
1367 self.assertEqual(expect, result)
1368
1369 given = ((b'\xa0\x24', b'\xc1\x24'),)
1370 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1371 expect = '%A0$=%C1$'
1372 self.assertEqual(expect, result)
1373
1374 # Safe parameter in sequence
1375 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1376 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1377 result = urllib.parse.urlencode(given, True, safe=":$")
1378 self.assertEqual(expect, result)
1379
1380 # Test all above in latin-1 encoding
1381
1382 given = ((b'\xa0\x24', b'\xc1\x24'),)
1383 result = urllib.parse.urlencode(given, safe=":$",
1384 encoding="latin-1")
1385 expect = '%A0$=%C1$'
1386 self.assertEqual(expect, result)
1387
1388 given = ((b'\xa0\x24', b'\xc1\x24'),)
1389 expect = '%A0$=%C1$'
1390 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1391 encoding="latin-1")
1392
1393 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1394 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1395 result = urllib.parse.urlencode(given, True, safe=":$",
1396 encoding="latin-1")
1397 self.assertEqual(expect, result)
1398
Brett Cannon74bfd702003-04-25 09:39:47 +00001399class Pathname_Tests(unittest.TestCase):
1400 """Test pathname2url() and url2pathname()"""
1401
1402 def test_basic(self):
1403 # Make sure simple tests pass
1404 expected_path = os.path.join("parts", "of", "a", "path")
1405 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001406 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001407 self.assertEqual(expected_url, result,
1408 "pathname2url() failed; %s != %s" %
1409 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001410 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001411 self.assertEqual(expected_path, result,
1412 "url2pathame() failed; %s != %s" %
1413 (result, expected_path))
1414
1415 def test_quoting(self):
1416 # Test automatic quoting and unquoting works for pathnam2url() and
1417 # url2pathname() respectively
1418 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001419 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1420 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001421 self.assertEqual(expect, result,
1422 "pathname2url() failed; %s != %s" %
1423 (expect, result))
1424 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001425 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001426 self.assertEqual(expect, result,
1427 "url2pathname() failed; %s != %s" %
1428 (expect, result))
1429 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001430 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1431 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001432 self.assertEqual(expect, result,
1433 "pathname2url() failed; %s != %s" %
1434 (expect, result))
1435 given = "make+sure/using_unquote"
1436 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001437 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001438 self.assertEqual(expect, result,
1439 "url2pathname() failed; %s != %s" %
1440 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001441
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001442 @unittest.skipUnless(sys.platform == 'win32',
1443 'test specific to the urllib.url2path function.')
1444 def test_ntpath(self):
1445 given = ('/C:/', '///C:/', '/C|//')
1446 expect = 'C:\\'
1447 for url in given:
1448 result = urllib.request.url2pathname(url)
1449 self.assertEqual(expect, result,
1450 'urllib.request..url2pathname() failed; %s != %s' %
1451 (expect, result))
1452 given = '///C|/path'
1453 expect = 'C:\\path'
1454 result = urllib.request.url2pathname(given)
1455 self.assertEqual(expect, result,
1456 'urllib.request.url2pathname() failed; %s != %s' %
1457 (expect, result))
1458
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001459class Utility_Tests(unittest.TestCase):
1460 """Testcase to test the various utility functions in the urllib."""
1461
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001462 def test_thishost(self):
1463 """Test the urllib.request.thishost utility function returns a tuple"""
1464 self.assertIsInstance(urllib.request.thishost(), tuple)
1465
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001466
Xtreakc661b302019-05-19 19:10:06 +05301467class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001468 """Testcase to test the open method of URLopener class."""
1469
1470 def test_quoted_open(self):
1471 class DummyURLopener(urllib.request.URLopener):
1472 def open_spam(self, url):
1473 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001474 with support.check_warnings(
1475 ('DummyURLopener style of invoking requests is deprecated.',
1476 DeprecationWarning)):
1477 self.assertEqual(DummyURLopener().open(
1478 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001479
Ezio Melotti79b99db2013-02-21 02:41:42 +02001480 # test the safe characters are not quoted by urlopen
1481 self.assertEqual(DummyURLopener().open(
1482 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1483 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001484
Xtreakc661b302019-05-19 19:10:06 +05301485 @support.ignore_warnings(category=DeprecationWarning)
1486 def test_urlopener_retrieve_file(self):
1487 with support.temp_dir() as tmpdir:
1488 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1489 os.close(fd)
1490 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1491 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001492 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1493 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301494
1495 @support.ignore_warnings(category=DeprecationWarning)
1496 def test_urlopener_retrieve_remote(self):
1497 url = "http://www.python.org/file.txt"
1498 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1499 self.addCleanup(self.unfakehttp)
1500 filename, _ = urllib.request.URLopener().retrieve(url)
1501 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1502
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001503 @support.ignore_warnings(category=DeprecationWarning)
1504 def test_local_file_open(self):
1505 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1506 class DummyURLopener(urllib.request.URLopener):
1507 def open_local_file(self, url):
1508 return url
1509 for url in ('local_file://example', 'local-file://example'):
1510 self.assertRaises(OSError, urllib.request.urlopen, url)
1511 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1512 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1513 self.assertRaises(OSError, DummyURLopener().open, url)
1514 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1515
Xtreakc661b302019-05-19 19:10:06 +05301516
Guido van Rossume7ba4952007-06-06 23:52:48 +00001517# Just commented them out.
1518# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001519# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001520# fail in one of the tests, sometimes in other. I have a linux, and
1521# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001522# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001523# . Facundo
1524#
1525# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001526# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001527# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1528# serv.settimeout(3)
1529# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1530# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001531# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001532# try:
1533# conn, addr = serv.accept()
1534# conn.send("1 Hola mundo\n")
1535# cantdata = 0
1536# while cantdata < 13:
1537# data = conn.recv(13-cantdata)
1538# cantdata += len(data)
1539# time.sleep(.3)
1540# conn.send("2 No more lines\n")
1541# conn.close()
1542# except socket.timeout:
1543# pass
1544# finally:
1545# serv.close()
1546# evt.set()
1547#
1548# class FTPWrapperTests(unittest.TestCase):
1549#
1550# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001551# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001552# ftplib.FTP.port = 9093
1553# self.evt = threading.Event()
1554# threading.Thread(target=server, args=(self.evt,)).start()
1555# time.sleep(.1)
1556#
1557# def tearDown(self):
1558# self.evt.wait()
1559#
1560# def testBasic(self):
1561# # connects
1562# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001563# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001564#
1565# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001566# # global default timeout is ignored
1567# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001568# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001569# socket.setdefaulttimeout(30)
1570# try:
1571# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1572# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001573# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001574# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001575# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001576#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001577# def testTimeoutDefault(self):
1578# # global default timeout is used
1579# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001580# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001581# socket.setdefaulttimeout(30)
1582# try:
1583# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1584# finally:
1585# socket.setdefaulttimeout(None)
1586# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1587# ftp.close()
1588#
1589# def testTimeoutValue(self):
1590# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1591# timeout=30)
1592# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1593# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001594
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001595
Senthil Kumarande49d642011-10-16 23:54:44 +08001596class RequestTests(unittest.TestCase):
1597 """Unit tests for urllib.request.Request."""
1598
1599 def test_default_values(self):
1600 Request = urllib.request.Request
1601 request = Request("http://www.python.org")
1602 self.assertEqual(request.get_method(), 'GET')
1603 request = Request("http://www.python.org", {})
1604 self.assertEqual(request.get_method(), 'POST')
1605
1606 def test_with_method_arg(self):
1607 Request = urllib.request.Request
1608 request = Request("http://www.python.org", method='HEAD')
1609 self.assertEqual(request.method, 'HEAD')
1610 self.assertEqual(request.get_method(), 'HEAD')
1611 request = Request("http://www.python.org", {}, method='HEAD')
1612 self.assertEqual(request.method, 'HEAD')
1613 self.assertEqual(request.get_method(), 'HEAD')
1614 request = Request("http://www.python.org", method='GET')
1615 self.assertEqual(request.get_method(), 'GET')
1616 request.method = 'HEAD'
1617 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001618
1619
Senthil Kumaran277e9092013-04-10 20:51:19 -07001620class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001621
Senthil Kumaran277e9092013-04-10 20:51:19 -07001622 def test_converting_drive_letter(self):
1623 self.assertEqual(url2pathname("///C|"), 'C:')
1624 self.assertEqual(url2pathname("///C:"), 'C:')
1625 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001626
Senthil Kumaran277e9092013-04-10 20:51:19 -07001627 def test_converting_when_no_drive_letter(self):
1628 # cannot end a raw string in \
1629 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1630 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1631
1632 def test_simple_compare(self):
1633 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1634 r'C:\foo\bar\spam.foo')
1635
1636 def test_non_ascii_drive_letter(self):
1637 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1638
1639 def test_roundtrip_url2pathname(self):
1640 list_of_paths = ['C:',
1641 r'\\\C\test\\',
1642 r'C:\foo\bar\spam.foo'
1643 ]
1644 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001645 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001646
1647class PathName2URLTests(unittest.TestCase):
1648
1649 def test_converting_drive_letter(self):
1650 self.assertEqual(pathname2url("C:"), '///C:')
1651 self.assertEqual(pathname2url("C:\\"), '///C:')
1652
1653 def test_converting_when_no_drive_letter(self):
1654 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1655 '/////folder/test/')
1656 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1657 '////folder/test/')
1658 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1659 '/folder/test/')
1660
1661 def test_simple_compare(self):
1662 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1663 "///C:/foo/bar/spam.foo" )
1664
1665 def test_long_drive_letter(self):
1666 self.assertRaises(IOError, pathname2url, "XX:\\")
1667
1668 def test_roundtrip_pathname2url(self):
1669 list_of_paths = ['///C:',
1670 '/////folder/test/',
1671 '///C:/foo/bar/spam.foo']
1672 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001673 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001674
1675if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001676 unittest.main()