blob: e9c656c583bbdbc53aec5fd75039072de8155b08 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -070059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -070093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def tearDown(self):
157 """Shut down the open object"""
158 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000159 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_interface(self):
162 # Make sure object returned by urlopen() has the specified methods
163 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000164 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000165 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "object returned by urlopen() lacks %s attribute" %
167 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_read(self):
170 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_readline(self):
173 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000174 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 "calling readline() after exhausting the file did not"
176 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_readlines(self):
179 lines_list = self.returned_obj.readlines()
180 self.assertEqual(len(lines_list), 1,
181 "readlines() returned the wrong number of lines")
182 self.assertEqual(lines_list[0], self.text,
183 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 def test_fileno(self):
186 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000187 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 self.assertEqual(os.read(file_num, len(self.text)), self.text,
189 "Reading on the file descriptor returned by fileno() "
190 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800193 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000194 # by the tearDown() method for the test
195 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000198 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000199
Brett Cannon74bfd702003-04-25 09:39:47 +0000200 def test_geturl(self):
201 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000202
Christian Heimes9bd667a2008-01-20 15:14:11 +0000203 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000204 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000205
Brett Cannon74bfd702003-04-25 09:39:47 +0000206 def test_iter(self):
207 # Test iterator
208 # Don't need to count number of iterations since test would fail the
209 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200210 # comparison.
211 # Use the iterator in the usual implicit way to test for ticket #4608.
212 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000213 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000214
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800215 def test_relativelocalfile(self):
216 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
217
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700218
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000219class ProxyTests(unittest.TestCase):
220
221 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000222 # Records changes to env vars
223 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000224 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000225 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000226 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000227 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000228
229 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000230 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000231 self.env.__exit__()
232 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000233
234 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000235 self.env.set('NO_PROXY', 'localhost')
236 proxies = urllib.request.getproxies_environment()
237 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000238 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800239 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700240 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800241 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700242 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
243 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
244
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700245 def test_proxy_cgi_ignore(self):
246 try:
247 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
248 proxies = urllib.request.getproxies_environment()
249 self.assertEqual('http://somewhere:3128', proxies['http'])
250 self.env.set('REQUEST_METHOD', 'GET')
251 proxies = urllib.request.getproxies_environment()
252 self.assertNotIn('http', proxies)
253 finally:
254 self.env.unset('REQUEST_METHOD')
255 self.env.unset('HTTP_PROXY')
256
Martin Panteraa279822016-04-30 01:03:40 +0000257 def test_proxy_bypass_environment_host_match(self):
258 bypass = urllib.request.proxy_bypass_environment
259 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800260 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000261 self.assertTrue(bypass('localhost'))
262 self.assertTrue(bypass('LocalHost')) # MixedCase
263 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800264 self.assertTrue(bypass('.localhost'))
Martin Panteraa279822016-04-30 01:03:40 +0000265 self.assertTrue(bypass('newdomain.com:1234'))
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800266 self.assertTrue(bypass('.newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800267 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800268 self.assertTrue(bypass('d.o.t'))
Martin Panteraa279822016-04-30 01:03:40 +0000269 self.assertTrue(bypass('anotherdomain.com:8888'))
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800270 self.assertTrue(bypass('.anotherdomain.com:8888'))
Martin Panteraa279822016-04-30 01:03:40 +0000271 self.assertTrue(bypass('www.newdomain.com:1234'))
272 self.assertFalse(bypass('prelocalhost'))
273 self.assertFalse(bypass('newdomain.com')) # no port
274 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700275
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800276 def test_proxy_bypass_environment_always_match(self):
277 bypass = urllib.request.proxy_bypass_environment
278 self.env.set('NO_PROXY', '*')
279 self.assertTrue(bypass('newdomain.com'))
280 self.assertTrue(bypass('newdomain.com:1234'))
281 self.env.set('NO_PROXY', '*, anotherdomain.com')
282 self.assertTrue(bypass('anotherdomain.com'))
283 self.assertFalse(bypass('newdomain.com'))
284 self.assertFalse(bypass('newdomain.com:1234'))
285
286 def test_proxy_bypass_environment_newline(self):
287 bypass = urllib.request.proxy_bypass_environment
288 self.env.set('NO_PROXY',
289 'localhost, anotherdomain.com, newdomain.com:1234')
290 self.assertFalse(bypass('localhost\n'))
291 self.assertFalse(bypass('anotherdomain.com:8888\n'))
292 self.assertFalse(bypass('newdomain.com:1234\n'))
293
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700294
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700295class ProxyTests_withOrderedEnv(unittest.TestCase):
296
297 def setUp(self):
298 # We need to test conditions, where variable order _is_ significant
299 self._saved_env = os.environ
300 # Monkey patch os.environ, start with empty fake environment
301 os.environ = collections.OrderedDict()
302
303 def tearDown(self):
304 os.environ = self._saved_env
305
306 def test_getproxies_environment_prefer_lowercase(self):
307 # Test lowercase preference with removal
308 os.environ['no_proxy'] = ''
309 os.environ['No_Proxy'] = 'localhost'
310 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
311 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
312 os.environ['http_proxy'] = ''
313 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
314 proxies = urllib.request.getproxies_environment()
315 self.assertEqual({}, proxies)
316 # Test lowercase preference of proxy bypass and correct matching including ports
317 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
318 os.environ['No_Proxy'] = 'xyz.com'
319 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
320 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
321 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
322 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
323 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
324 # Test lowercase preference with replacement
325 os.environ['http_proxy'] = 'http://somewhere:3128'
326 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
327 proxies = urllib.request.getproxies_environment()
328 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000329
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700330
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700331class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000332 """Test urlopen() opening a fake http connection."""
333
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000334 def check_read(self, ver):
335 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000336 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000337 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000338 self.assertEqual(fp.readline(), b"Hello!")
339 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000340 self.assertEqual(fp.geturl(), 'http://python.org/')
341 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000342 finally:
343 self.unfakehttp()
344
Senthil Kumaran26430412011-04-13 07:01:19 +0800345 def test_url_fragment(self):
346 # Issue #11703: geturl() omits fragments in the original URL.
347 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800348 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800349 try:
350 fp = urllib.request.urlopen(url)
351 self.assertEqual(fp.geturl(), url)
352 finally:
353 self.unfakehttp()
354
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800355 def test_willclose(self):
356 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800357 try:
358 resp = urlopen("http://www.python.org")
359 self.assertTrue(resp.fp.will_close)
360 finally:
361 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800362
Xtreak2fc936e2019-05-01 17:29:49 +0530363 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700364 def test_url_with_control_char_rejected(self):
365 for char_no in list(range(0, 0x21)) + [0x7f]:
366 char = chr(char_no)
367 schemeless_url = f"//localhost:7777/test{char}/"
368 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
369 try:
370 # We explicitly test urllib.request.urlopen() instead of the top
371 # level 'def urlopen()' function defined in this... (quite ugly)
372 # test suite. They use different url opening codepaths. Plain
373 # urlopen uses FancyURLOpener which goes via a codepath that
374 # calls urllib.parse.quote() on the URL which makes all of the
375 # above attempts at injection within the url _path_ safe.
376 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400377 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700378 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400379 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700380 urllib.request.urlopen(f"http:{schemeless_url}")
381 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400382 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700383 urllib.request.urlopen(f"https:{schemeless_url}")
384 # This code path quotes the URL so there is no injection.
385 resp = urlopen(f"http:{schemeless_url}")
386 self.assertNotIn(char, resp.geturl())
387 finally:
388 self.unfakehttp()
389
Xtreak2fc936e2019-05-01 17:29:49 +0530390 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700391 def test_url_with_newline_header_injection_rejected(self):
392 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
393 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
394 schemeless_url = "//" + host + ":8080/test/?test=a"
395 try:
396 # We explicitly test urllib.request.urlopen() instead of the top
397 # level 'def urlopen()' function defined in this... (quite ugly)
398 # test suite. They use different url opening codepaths. Plain
399 # urlopen uses FancyURLOpener which goes via a codepath that
400 # calls urllib.parse.quote() on the URL which makes all of the
401 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400402 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700403 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400404 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700405 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400406 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700407 urllib.request.urlopen(f"https:{schemeless_url}")
408 # This code path quotes the URL so there is no injection.
409 resp = urlopen(f"http:{schemeless_url}")
410 self.assertNotIn(' ', resp.geturl())
411 self.assertNotIn('\r', resp.geturl())
412 self.assertNotIn('\n', resp.geturl())
413 finally:
414 self.unfakehttp()
415
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000416 def test_read_0_9(self):
417 # "0.9" response accepted (but not "simple responses" without
418 # a status line)
419 self.check_read(b"0.9")
420
421 def test_read_1_0(self):
422 self.check_read(b"1.0")
423
424 def test_read_1_1(self):
425 self.check_read(b"1.1")
426
Christian Heimes57dddfb2008-01-02 18:30:52 +0000427 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200428 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000429 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
430Date: Wed, 02 Jan 2008 03:03:54 GMT
431Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
432Connection: close
433Content-Type: text/html; charset=iso-8859-1
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700434''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000435 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200436 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000437 finally:
438 self.unfakehttp()
439
guido@google.coma119df92011-03-29 11:41:02 -0700440 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200441 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700442 self.fakehttp(b'''HTTP/1.1 302 Found
443Date: Wed, 02 Jan 2008 03:03:54 GMT
444Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
445Location: file://guidocomputer.athome.com:/python/license
446Connection: close
447Content-Type: text/html; charset=iso-8859-1
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700448''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700449 try:
Martin Pantera0370222016-02-04 06:01:35 +0000450 msg = "Redirection to url 'file:"
451 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
452 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700453 finally:
454 self.unfakehttp()
455
Martin Pantera0370222016-02-04 06:01:35 +0000456 def test_redirect_limit_independent(self):
457 # Ticket #12923: make sure independent requests each use their
458 # own retry limit.
459 for i in range(FancyURLopener().maxtries):
460 self.fakehttp(b'''HTTP/1.1 302 Found
461Location: file://guidocomputer.athome.com:/python/license
462Connection: close
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700463''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000464 try:
465 self.assertRaises(urllib.error.HTTPError, urlopen,
466 "http://something")
467 finally:
468 self.unfakehttp()
469
Guido van Rossumd8faa362007-04-27 19:54:29 +0000470 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200471 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000472 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000473 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000474 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200475 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000476 finally:
477 self.unfakehttp()
478
Senthil Kumaranf5776862012-10-21 13:30:02 -0700479 def test_missing_localfile(self):
480 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700481 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700482 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700483 self.assertTrue(e.exception.filename)
484 self.assertTrue(e.exception.reason)
485
486 def test_file_notexists(self):
487 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700488 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700489 try:
490 self.assertTrue(os.path.exists(tmp_file))
491 with urlopen(tmp_fileurl) as fobj:
492 self.assertTrue(fobj)
493 finally:
494 os.close(fd)
495 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700496 self.assertFalse(os.path.exists(tmp_file))
497 with self.assertRaises(urllib.error.URLError):
498 urlopen(tmp_fileurl)
499
500 def test_ftp_nohost(self):
501 test_ftp_url = 'ftp:///path'
502 with self.assertRaises(urllib.error.URLError) as e:
503 urlopen(test_ftp_url)
504 self.assertFalse(e.exception.filename)
505 self.assertTrue(e.exception.reason)
506
507 def test_ftp_nonexisting(self):
508 with self.assertRaises(urllib.error.URLError) as e:
509 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
510 self.assertFalse(e.exception.filename)
511 self.assertTrue(e.exception.reason)
512
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700513 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
514 def test_ftp_cache_pruning(self):
515 self.fakeftp()
516 try:
517 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
518 urlopen('ftp://localhost')
519 finally:
520 self.unfakeftp()
521
Senthil Kumarande0eb242010-08-01 17:53:37 +0000522 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000523 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000524 try:
525 fp = urlopen("http://user:pass@python.org/")
526 self.assertEqual(fp.readline(), b"Hello!")
527 self.assertEqual(fp.readline(), b"")
528 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
529 self.assertEqual(fp.getcode(), 200)
530 finally:
531 self.unfakehttp()
532
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800533 def test_userpass_inurl_w_spaces(self):
534 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
535 try:
536 userpass = "a b:c d"
537 url = "http://{}@python.org/".format(userpass)
538 fakehttp_wrapper = http.client.HTTPConnection
539 authorization = ("Authorization: Basic %s\r\n" %
540 b64encode(userpass.encode("ASCII")).decode("ASCII"))
541 fp = urlopen(url)
542 # The authorization header must be in place
543 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
544 self.assertEqual(fp.readline(), b"Hello!")
545 self.assertEqual(fp.readline(), b"")
546 # the spaces are quoted in URL so no match
547 self.assertNotEqual(fp.geturl(), url)
548 self.assertEqual(fp.getcode(), 200)
549 finally:
550 self.unfakehttp()
551
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700552 def test_URLopener_deprecation(self):
553 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700554 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700555
Antoine Pitrou07df6552014-11-02 17:23:14 +0100556 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800557 def test_cafile_and_context(self):
558 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200559 with support.check_warnings(('', DeprecationWarning)):
560 with self.assertRaises(ValueError):
561 urllib.request.urlopen(
562 "https://localhost", cafile="/nonexistent/path", context=context
563 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800564
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700565
Antoine Pitroudf204be2012-11-24 17:59:08 +0100566class urlopen_DataTests(unittest.TestCase):
567 """Test urlopen() opening a data URL."""
568
569 def setUp(self):
570 # text containing URL special- and unicode-characters
571 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
572 # 2x1 pixel RGB PNG image with one black and one white pixel
573 self.image = (
574 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
575 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
576 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
577 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
578
579 self.text_url = (
580 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
581 "D%26%20%C3%B6%20%C3%84%20")
582 self.text_url_base64 = (
583 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
584 "sJT0mIPYgxCA%3D")
585 # base64 encoded data URL that contains ignorable spaces,
586 # such as "\n", " ", "%0A", and "%20".
587 self.image_url = (
588 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
589 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
590 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
591
592 self.text_url_resp = urllib.request.urlopen(self.text_url)
593 self.text_url_base64_resp = urllib.request.urlopen(
594 self.text_url_base64)
595 self.image_url_resp = urllib.request.urlopen(self.image_url)
596
597 def test_interface(self):
598 # Make sure object returned by urlopen() has the specified methods
599 for attr in ("read", "readline", "readlines",
600 "close", "info", "geturl", "getcode", "__iter__"):
601 self.assertTrue(hasattr(self.text_url_resp, attr),
602 "object returned by urlopen() lacks %s attribute" %
603 attr)
604
605 def test_info(self):
606 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
607 self.assertEqual(self.text_url_base64_resp.info().get_params(),
608 [('text/plain', ''), ('charset', 'ISO-8859-1')])
609 self.assertEqual(self.image_url_resp.info()['content-length'],
610 str(len(self.image)))
611 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
612 [('text/plain', ''), ('charset', 'US-ASCII')])
613
614 def test_geturl(self):
615 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
616 self.assertEqual(self.text_url_base64_resp.geturl(),
617 self.text_url_base64)
618 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
619
620 def test_read_text(self):
621 self.assertEqual(self.text_url_resp.read().decode(
622 dict(self.text_url_resp.info().get_params())['charset']), self.text)
623
624 def test_read_text_base64(self):
625 self.assertEqual(self.text_url_base64_resp.read().decode(
626 dict(self.text_url_base64_resp.info().get_params())['charset']),
627 self.text)
628
629 def test_read_image(self):
630 self.assertEqual(self.image_url_resp.read(), self.image)
631
632 def test_missing_comma(self):
633 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
634
635 def test_invalid_base64_data(self):
636 # missing padding character
637 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
638
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700639
Brett Cannon19691362003-04-29 05:08:06 +0000640class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000641 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000642
Brett Cannon19691362003-04-29 05:08:06 +0000643 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000644 # Create a list of temporary files. Each item in the list is a file
645 # name (absolute path or relative to the current working directory).
646 # All files in this list will be deleted in the tearDown method. Note,
647 # this only helps to makes sure temporary files get deleted, but it
648 # does nothing about trying to close files that may still be open. It
649 # is the responsibility of the developer to properly close files even
650 # when exceptional conditions occur.
651 self.tempFiles = []
652
Brett Cannon19691362003-04-29 05:08:06 +0000653 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000654 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000655 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000656 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000657 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000658 FILE.write(self.text)
659 FILE.close()
660 finally:
661 try: FILE.close()
662 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000663
664 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000665 # Delete the temporary files.
666 for each in self.tempFiles:
667 try: os.remove(each)
668 except: pass
669
670 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000671 filePath = os.path.abspath(filePath)
672 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000673 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000674 except UnicodeEncodeError:
675 raise unittest.SkipTest("filePath is not encodable to utf8")
676 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000677
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000678 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000679 """Creates a new temporary file containing the specified data,
680 registers the file for deletion during the test fixture tear down, and
681 returns the absolute path of the file."""
682
683 newFd, newFilePath = tempfile.mkstemp()
684 try:
685 self.registerFileForCleanUp(newFilePath)
686 newFile = os.fdopen(newFd, "wb")
687 newFile.write(data)
688 newFile.close()
689 finally:
690 try: newFile.close()
691 except: pass
692 return newFilePath
693
694 def registerFileForCleanUp(self, fileName):
695 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000696
697 def test_basic(self):
698 # Make sure that a local file just gets its own location returned and
699 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000700 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000701 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000702 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000703 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000704 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000705
706 def test_copy(self):
707 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000708 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000709 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000710 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000711 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000712 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000713 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000714 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000715 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000716 try:
717 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000718 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000719 finally:
720 try: FILE.close()
721 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000722 self.assertEqual(self.text, text)
723
724 def test_reporthook(self):
725 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700726 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
727 self.assertIsInstance(block_count, int)
728 self.assertIsInstance(block_read_size, int)
729 self.assertIsInstance(file_size, int)
730 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000731 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000732 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000733 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000734 urllib.request.urlretrieve(
735 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000736 second_temp, hooktester)
737
738 def test_reporthook_0_bytes(self):
739 # Test on zero length file. Should call reporthook only 1 time.
740 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700741 def hooktester(block_count, block_read_size, file_size, _report=report):
742 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000743 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000744 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000745 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000746 self.assertEqual(len(report), 1)
747 self.assertEqual(report[0][2], 0)
748
749 def test_reporthook_5_bytes(self):
750 # Test on 5 byte file. Should call reporthook only 2 times (once when
751 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700752 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000753 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700754 def hooktester(block_count, block_read_size, file_size, _report=report):
755 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000756 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000757 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000758 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000759 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800760 self.assertEqual(report[0][2], 5)
761 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000762
763 def test_reporthook_8193_bytes(self):
764 # Test on 8193 byte file. Should call reporthook only 3 times (once
765 # when the "network connection" is established, once for the next 8192
766 # bytes, and once for the last byte).
767 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700768 def hooktester(block_count, block_read_size, file_size, _report=report):
769 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000770 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000771 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000772 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000773 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800774 self.assertEqual(report[0][2], 8193)
775 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700776 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800777 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000778
Senthil Kumarance260142011-11-01 01:35:17 +0800779
780class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
781 """Test urllib.urlretrieve() using fake http connections"""
782
783 def test_short_content_raises_ContentTooShortError(self):
784 self.fakehttp(b'''HTTP/1.1 200 OK
785Date: Wed, 02 Jan 2008 03:03:54 GMT
786Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
787Connection: close
788Content-Length: 100
789Content-Type: text/html; charset=iso-8859-1
790
791FF
792''')
793
794 def _reporthook(par1, par2, par3):
795 pass
796
797 with self.assertRaises(urllib.error.ContentTooShortError):
798 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100799 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800800 reporthook=_reporthook)
801 finally:
802 self.unfakehttp()
803
804 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
805 self.fakehttp(b'''HTTP/1.1 200 OK
806Date: Wed, 02 Jan 2008 03:03:54 GMT
807Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
808Connection: close
809Content-Length: 100
810Content-Type: text/html; charset=iso-8859-1
811
812FF
813''')
814 with self.assertRaises(urllib.error.ContentTooShortError):
815 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100816 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800817 finally:
818 self.unfakehttp()
819
820
Brett Cannon74bfd702003-04-25 09:39:47 +0000821class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400822 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000823
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530824 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 character you write it as '%' + <2 character US-ASCII hex value>.
826 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
827 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000828
829 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000830
Brett Cannon74bfd702003-04-25 09:39:47 +0000831 Reserved characters : ";/?:@&=+$,"
832 Have special meaning in URIs and must be escaped if not being used for
833 their special meaning
834 Data characters : letters, digits, and "-_.!~*'()"
835 Unreserved and do not need to be escaped; can be, though, if desired
836 Control characters : 0x00 - 0x1F, 0x7F
837 Have no use in URIs so must be escaped
838 space : 0x20
839 Must be escaped
840 Delimiters : '<>#%"'
841 Must be escaped
842 Unwise : "{}|\^[]`"
843 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000844
Brett Cannon74bfd702003-04-25 09:39:47 +0000845 """
846
847 def test_never_quote(self):
848 # Make sure quote() does not quote letters, digits, and "_,.-"
849 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
850 "abcdefghijklmnopqrstuvwxyz",
851 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530852 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000853 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000854 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000855 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000856 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000857 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000858 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000859
860 def test_default_safe(self):
861 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000862 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000863
864 def test_safe(self):
865 # Test setting 'safe' parameter does what it should do
866 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000867 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000868 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000869 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000870 result = urllib.parse.quote_plus(quote_by_default,
871 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000872 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000873 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000874 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000875 # Safe expressed as bytes rather than str
876 result = urllib.parse.quote(quote_by_default, safe=b"<>")
877 self.assertEqual(quote_by_default, result,
878 "using quote(): %r != %r" % (quote_by_default, result))
879 # "Safe" non-ASCII characters should have no effect
880 # (Since URIs are not allowed to have non-ASCII characters)
881 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
882 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
883 self.assertEqual(expect, result,
884 "using quote(): %r != %r" %
885 (expect, result))
886 # Same as above, but using a bytes rather than str
887 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
888 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
889 self.assertEqual(expect, result,
890 "using quote(): %r != %r" %
891 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000892
893 def test_default_quoting(self):
894 # Make sure all characters that should be quoted are by default sans
895 # space (separate test for that).
896 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400897 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000898 should_quote.append(chr(127)) # For 0x7F
899 should_quote = ''.join(should_quote)
900 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000901 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000902 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000903 "using quote(): "
904 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000905 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000906 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000907 self.assertEqual(hexescape(char), result,
908 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000909 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000910 (char, hexescape(char), result))
911 del should_quote
912 partial_quote = "ab[]cd"
913 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000914 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000915 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000916 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800917 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000918 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000919 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000920
921 def test_quoting_space(self):
922 # Make sure quote() and quote_plus() handle spaces as specified in
923 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000924 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000925 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000926 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000927 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000928 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000929 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000930 given = "a b cd e f"
931 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000932 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000933 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000934 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000935 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000936 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000937 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000938 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000939
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000940 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000941 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000942 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000943 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000944 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000945 # Test with bytes
946 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
947 'alpha%2Bbeta+gamma')
948 # Test with safe bytes
949 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
950 'alpha+beta+gamma')
951
952 def test_quote_bytes(self):
953 # Bytes should quote directly to percent-encoded values
954 given = b"\xa2\xd8ab\xff"
955 expect = "%A2%D8ab%FF"
956 result = urllib.parse.quote(given)
957 self.assertEqual(expect, result,
958 "using quote(): %r != %r" % (expect, result))
959 # Encoding argument should raise type error on bytes input
960 self.assertRaises(TypeError, urllib.parse.quote, given,
961 encoding="latin-1")
962 # quote_from_bytes should work the same
963 result = urllib.parse.quote_from_bytes(given)
964 self.assertEqual(expect, result,
965 "using quote_from_bytes(): %r != %r"
966 % (expect, result))
967
968 def test_quote_with_unicode(self):
969 # Characters in Latin-1 range, encoded by default in UTF-8
970 given = "\xa2\xd8ab\xff"
971 expect = "%C2%A2%C3%98ab%C3%BF"
972 result = urllib.parse.quote(given)
973 self.assertEqual(expect, result,
974 "using quote(): %r != %r" % (expect, result))
975 # Characters in Latin-1 range, encoded by with None (default)
976 result = urllib.parse.quote(given, encoding=None, errors=None)
977 self.assertEqual(expect, result,
978 "using quote(): %r != %r" % (expect, result))
979 # Characters in Latin-1 range, encoded with Latin-1
980 given = "\xa2\xd8ab\xff"
981 expect = "%A2%D8ab%FF"
982 result = urllib.parse.quote(given, encoding="latin-1")
983 self.assertEqual(expect, result,
984 "using quote(): %r != %r" % (expect, result))
985 # Characters in BMP, encoded by default in UTF-8
986 given = "\u6f22\u5b57" # "Kanji"
987 expect = "%E6%BC%A2%E5%AD%97"
988 result = urllib.parse.quote(given)
989 self.assertEqual(expect, result,
990 "using quote(): %r != %r" % (expect, result))
991 # Characters in BMP, encoded with Latin-1
992 given = "\u6f22\u5b57"
993 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
994 encoding="latin-1")
995 # Characters in BMP, encoded with Latin-1, with replace error handling
996 given = "\u6f22\u5b57"
997 expect = "%3F%3F" # "??"
998 result = urllib.parse.quote(given, encoding="latin-1",
999 errors="replace")
1000 self.assertEqual(expect, result,
1001 "using quote(): %r != %r" % (expect, result))
1002 # Characters in BMP, Latin-1, with xmlcharref error handling
1003 given = "\u6f22\u5b57"
1004 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1005 result = urllib.parse.quote(given, encoding="latin-1",
1006 errors="xmlcharrefreplace")
1007 self.assertEqual(expect, result,
1008 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001009
Georg Brandlfaf41492009-05-26 18:31:11 +00001010 def test_quote_plus_with_unicode(self):
1011 # Encoding (latin-1) test for quote_plus
1012 given = "\xa2\xd8 \xff"
1013 expect = "%A2%D8+%FF"
1014 result = urllib.parse.quote_plus(given, encoding="latin-1")
1015 self.assertEqual(expect, result,
1016 "using quote_plus(): %r != %r" % (expect, result))
1017 # Errors test for quote_plus
1018 given = "ab\u6f22\u5b57 cd"
1019 expect = "ab%3F%3F+cd"
1020 result = urllib.parse.quote_plus(given, encoding="latin-1",
1021 errors="replace")
1022 self.assertEqual(expect, result,
1023 "using quote_plus(): %r != %r" % (expect, result))
1024
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001025
Brett Cannon74bfd702003-04-25 09:39:47 +00001026class UnquotingTests(unittest.TestCase):
1027 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001028
Brett Cannon74bfd702003-04-25 09:39:47 +00001029 See the doc string for quoting_Tests for details on quoting and such.
1030
1031 """
1032
1033 def test_unquoting(self):
1034 # Make sure unquoting of all ASCII values works
1035 escape_list = []
1036 for num in range(128):
1037 given = hexescape(chr(num))
1038 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001039 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001040 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001041 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001042 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001043 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001044 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001045 (expect, result))
1046 escape_list.append(given)
1047 escape_string = ''.join(escape_list)
1048 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001049 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001050 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001051 "using unquote(): not all characters escaped: "
1052 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001053 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1054 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +00001055 with support.check_warnings(('', BytesWarning), quiet=True):
1056 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +00001057
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001058 def test_unquoting_badpercent(self):
1059 # Test unquoting on bad percent-escapes
1060 given = '%xab'
1061 expect = given
1062 result = urllib.parse.unquote(given)
1063 self.assertEqual(expect, result, "using unquote(): %r != %r"
1064 % (expect, result))
1065 given = '%x'
1066 expect = given
1067 result = urllib.parse.unquote(given)
1068 self.assertEqual(expect, result, "using unquote(): %r != %r"
1069 % (expect, result))
1070 given = '%'
1071 expect = given
1072 result = urllib.parse.unquote(given)
1073 self.assertEqual(expect, result, "using unquote(): %r != %r"
1074 % (expect, result))
1075 # unquote_to_bytes
1076 given = '%xab'
1077 expect = bytes(given, 'ascii')
1078 result = urllib.parse.unquote_to_bytes(given)
1079 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1080 % (expect, result))
1081 given = '%x'
1082 expect = bytes(given, 'ascii')
1083 result = urllib.parse.unquote_to_bytes(given)
1084 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1085 % (expect, result))
1086 given = '%'
1087 expect = bytes(given, 'ascii')
1088 result = urllib.parse.unquote_to_bytes(given)
1089 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1090 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001091 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1092 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001093
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001094 def test_unquoting_mixed_case(self):
1095 # Test unquoting on mixed-case hex digits in the percent-escapes
1096 given = '%Ab%eA'
1097 expect = b'\xab\xea'
1098 result = urllib.parse.unquote_to_bytes(given)
1099 self.assertEqual(expect, result,
1100 "using unquote_to_bytes(): %r != %r"
1101 % (expect, result))
1102
Brett Cannon74bfd702003-04-25 09:39:47 +00001103 def test_unquoting_parts(self):
1104 # Make sure unquoting works when have non-quoted characters
1105 # interspersed
1106 given = 'ab%sd' % hexescape('c')
1107 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001108 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001109 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001110 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001111 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001112 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001113 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001114
Brett Cannon74bfd702003-04-25 09:39:47 +00001115 def test_unquoting_plus(self):
1116 # Test difference between unquote() and unquote_plus()
1117 given = "are+there+spaces..."
1118 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001119 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001120 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001121 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001122 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001123 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001124 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001125 "using unquote_plus(): %r != %r" % (expect, result))
1126
1127 def test_unquote_to_bytes(self):
1128 given = 'br%C3%BCckner_sapporo_20050930.doc'
1129 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1130 result = urllib.parse.unquote_to_bytes(given)
1131 self.assertEqual(expect, result,
1132 "using unquote_to_bytes(): %r != %r"
1133 % (expect, result))
1134 # Test on a string with unescaped non-ASCII characters
1135 # (Technically an invalid URI; expect those characters to be UTF-8
1136 # encoded).
1137 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1138 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1139 self.assertEqual(expect, result,
1140 "using unquote_to_bytes(): %r != %r"
1141 % (expect, result))
1142 # Test with a bytes as input
1143 given = b'%A2%D8ab%FF'
1144 expect = b'\xa2\xd8ab\xff'
1145 result = urllib.parse.unquote_to_bytes(given)
1146 self.assertEqual(expect, result,
1147 "using unquote_to_bytes(): %r != %r"
1148 % (expect, result))
1149 # Test with a bytes as input, with unescaped non-ASCII bytes
1150 # (Technically an invalid URI; expect those bytes to be preserved)
1151 given = b'%A2\xd8ab%FF'
1152 expect = b'\xa2\xd8ab\xff'
1153 result = urllib.parse.unquote_to_bytes(given)
1154 self.assertEqual(expect, result,
1155 "using unquote_to_bytes(): %r != %r"
1156 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001157
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001158 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001159 # Characters in the Latin-1 range, encoded with UTF-8
1160 given = 'br%C3%BCckner_sapporo_20050930.doc'
1161 expect = 'br\u00fcckner_sapporo_20050930.doc'
1162 result = urllib.parse.unquote(given)
1163 self.assertEqual(expect, result,
1164 "using unquote(): %r != %r" % (expect, result))
1165 # Characters in the Latin-1 range, encoded with None (default)
1166 result = urllib.parse.unquote(given, encoding=None, errors=None)
1167 self.assertEqual(expect, result,
1168 "using unquote(): %r != %r" % (expect, result))
1169
1170 # Characters in the Latin-1 range, encoded with Latin-1
1171 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1172 encoding="latin-1")
1173 expect = 'br\u00fcckner_sapporo_20050930.doc'
1174 self.assertEqual(expect, result,
1175 "using unquote(): %r != %r" % (expect, result))
1176
1177 # Characters in BMP, encoded with UTF-8
1178 given = "%E6%BC%A2%E5%AD%97"
1179 expect = "\u6f22\u5b57" # "Kanji"
1180 result = urllib.parse.unquote(given)
1181 self.assertEqual(expect, result,
1182 "using unquote(): %r != %r" % (expect, result))
1183
1184 # Decode with UTF-8, invalid sequence
1185 given = "%F3%B1"
1186 expect = "\ufffd" # Replacement character
1187 result = urllib.parse.unquote(given)
1188 self.assertEqual(expect, result,
1189 "using unquote(): %r != %r" % (expect, result))
1190
1191 # Decode with UTF-8, invalid sequence, replace errors
1192 result = urllib.parse.unquote(given, errors="replace")
1193 self.assertEqual(expect, result,
1194 "using unquote(): %r != %r" % (expect, result))
1195
1196 # Decode with UTF-8, invalid sequence, ignoring errors
1197 given = "%F3%B1"
1198 expect = ""
1199 result = urllib.parse.unquote(given, errors="ignore")
1200 self.assertEqual(expect, result,
1201 "using unquote(): %r != %r" % (expect, result))
1202
1203 # A mix of non-ASCII and percent-encoded characters, UTF-8
1204 result = urllib.parse.unquote("\u6f22%C3%BC")
1205 expect = '\u6f22\u00fc'
1206 self.assertEqual(expect, result,
1207 "using unquote(): %r != %r" % (expect, result))
1208
1209 # A mix of non-ASCII and percent-encoded characters, Latin-1
1210 # (Note, the string contains non-Latin-1-representable characters)
1211 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1212 expect = '\u6f22\u00fc'
1213 self.assertEqual(expect, result,
1214 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001215
Brett Cannon74bfd702003-04-25 09:39:47 +00001216class urlencode_Tests(unittest.TestCase):
1217 """Tests for urlencode()"""
1218
1219 def help_inputtype(self, given, test_type):
1220 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001221
Brett Cannon74bfd702003-04-25 09:39:47 +00001222 'given' must lead to only the pairs:
1223 * 1st, 1
1224 * 2nd, 2
1225 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001226
Brett Cannon74bfd702003-04-25 09:39:47 +00001227 Test cannot assume anything about order. Docs make no guarantee and
1228 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001229
Brett Cannon74bfd702003-04-25 09:39:47 +00001230 """
1231 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001232 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001233 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001234 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001235 "testing %s: %s not found in %s" %
1236 (test_type, expected, result))
1237 self.assertEqual(result.count('&'), 2,
1238 "testing %s: expected 2 '&'s; got %s" %
1239 (test_type, result.count('&')))
1240 amp_location = result.index('&')
1241 on_amp_left = result[amp_location - 1]
1242 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001243 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001244 "testing %s: '&' not located in proper place in %s" %
1245 (test_type, result))
1246 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1247 "testing %s: "
1248 "unexpected number of characters: %s != %s" %
1249 (test_type, len(result), (5 * 3) + 2))
1250
1251 def test_using_mapping(self):
1252 # Test passing in a mapping object as an argument.
1253 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1254 "using dict as input type")
1255
1256 def test_using_sequence(self):
1257 # Test passing in a sequence of two-item sequences as an argument.
1258 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1259 "using sequence of two-item tuples as input")
1260
1261 def test_quoting(self):
1262 # Make sure keys and values are quoted using quote_plus()
1263 given = {"&":"="}
1264 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001265 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001266 self.assertEqual(expect, result)
1267 given = {"key name":"A bunch of pluses"}
1268 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001269 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001270 self.assertEqual(expect, result)
1271
1272 def test_doseq(self):
1273 # Test that passing True for 'doseq' parameter works correctly
1274 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001275 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1276 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001277 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001278 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001279 for value in given["sequence"]:
1280 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001281 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001282 self.assertEqual(result.count('&'), 2,
1283 "Expected 2 '&'s, got %s" % result.count('&'))
1284
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001285 def test_empty_sequence(self):
1286 self.assertEqual("", urllib.parse.urlencode({}))
1287 self.assertEqual("", urllib.parse.urlencode([]))
1288
1289 def test_nonstring_values(self):
1290 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1291 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1292
1293 def test_nonstring_seq_values(self):
1294 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1295 self.assertEqual("a=None&a=a",
1296 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001297 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001298 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001299 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001300
Senthil Kumarandf022da2010-07-03 17:48:22 +00001301 def test_urlencode_encoding(self):
1302 # ASCII encoding. Expect %3F with errors="replace'
1303 given = (('\u00a0', '\u00c1'),)
1304 expect = '%3F=%3F'
1305 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1306 self.assertEqual(expect, result)
1307
1308 # Default is UTF-8 encoding.
1309 given = (('\u00a0', '\u00c1'),)
1310 expect = '%C2%A0=%C3%81'
1311 result = urllib.parse.urlencode(given)
1312 self.assertEqual(expect, result)
1313
1314 # Latin-1 encoding.
1315 given = (('\u00a0', '\u00c1'),)
1316 expect = '%A0=%C1'
1317 result = urllib.parse.urlencode(given, encoding="latin-1")
1318 self.assertEqual(expect, result)
1319
1320 def test_urlencode_encoding_doseq(self):
1321 # ASCII Encoding. Expect %3F with errors="replace'
1322 given = (('\u00a0', '\u00c1'),)
1323 expect = '%3F=%3F'
1324 result = urllib.parse.urlencode(given, doseq=True,
1325 encoding="ASCII", errors="replace")
1326 self.assertEqual(expect, result)
1327
1328 # ASCII Encoding. On a sequence of values.
1329 given = (("\u00a0", (1, "\u00c1")),)
1330 expect = '%3F=1&%3F=%3F'
1331 result = urllib.parse.urlencode(given, True,
1332 encoding="ASCII", errors="replace")
1333 self.assertEqual(expect, result)
1334
1335 # Utf-8
1336 given = (("\u00a0", "\u00c1"),)
1337 expect = '%C2%A0=%C3%81'
1338 result = urllib.parse.urlencode(given, True)
1339 self.assertEqual(expect, result)
1340
1341 given = (("\u00a0", (42, "\u00c1")),)
1342 expect = '%C2%A0=42&%C2%A0=%C3%81'
1343 result = urllib.parse.urlencode(given, True)
1344 self.assertEqual(expect, result)
1345
1346 # latin-1
1347 given = (("\u00a0", "\u00c1"),)
1348 expect = '%A0=%C1'
1349 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1350 self.assertEqual(expect, result)
1351
1352 given = (("\u00a0", (42, "\u00c1")),)
1353 expect = '%A0=42&%A0=%C1'
1354 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1355 self.assertEqual(expect, result)
1356
1357 def test_urlencode_bytes(self):
1358 given = ((b'\xa0\x24', b'\xc1\x24'),)
1359 expect = '%A0%24=%C1%24'
1360 result = urllib.parse.urlencode(given)
1361 self.assertEqual(expect, result)
1362 result = urllib.parse.urlencode(given, True)
1363 self.assertEqual(expect, result)
1364
1365 # Sequence of values
1366 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1367 expect = '%A0%24=42&%A0%24=%C1%24'
1368 result = urllib.parse.urlencode(given, True)
1369 self.assertEqual(expect, result)
1370
1371 def test_urlencode_encoding_safe_parameter(self):
1372
1373 # Send '$' (\x24) as safe character
1374 # Default utf-8 encoding
1375
1376 given = ((b'\xa0\x24', b'\xc1\x24'),)
1377 result = urllib.parse.urlencode(given, safe=":$")
1378 expect = '%A0$=%C1$'
1379 self.assertEqual(expect, result)
1380
1381 given = ((b'\xa0\x24', b'\xc1\x24'),)
1382 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1383 expect = '%A0$=%C1$'
1384 self.assertEqual(expect, result)
1385
1386 # Safe parameter in sequence
1387 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1388 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1389 result = urllib.parse.urlencode(given, True, safe=":$")
1390 self.assertEqual(expect, result)
1391
1392 # Test all above in latin-1 encoding
1393
1394 given = ((b'\xa0\x24', b'\xc1\x24'),)
1395 result = urllib.parse.urlencode(given, safe=":$",
1396 encoding="latin-1")
1397 expect = '%A0$=%C1$'
1398 self.assertEqual(expect, result)
1399
1400 given = ((b'\xa0\x24', b'\xc1\x24'),)
1401 expect = '%A0$=%C1$'
1402 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1403 encoding="latin-1")
1404
1405 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1406 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1407 result = urllib.parse.urlencode(given, True, safe=":$",
1408 encoding="latin-1")
1409 self.assertEqual(expect, result)
1410
Brett Cannon74bfd702003-04-25 09:39:47 +00001411class Pathname_Tests(unittest.TestCase):
1412 """Test pathname2url() and url2pathname()"""
1413
1414 def test_basic(self):
1415 # Make sure simple tests pass
1416 expected_path = os.path.join("parts", "of", "a", "path")
1417 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001418 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001419 self.assertEqual(expected_url, result,
1420 "pathname2url() failed; %s != %s" %
1421 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001422 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001423 self.assertEqual(expected_path, result,
1424 "url2pathame() failed; %s != %s" %
1425 (result, expected_path))
1426
1427 def test_quoting(self):
1428 # Test automatic quoting and unquoting works for pathnam2url() and
1429 # url2pathname() respectively
1430 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001431 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1432 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001433 self.assertEqual(expect, result,
1434 "pathname2url() failed; %s != %s" %
1435 (expect, result))
1436 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001437 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001438 self.assertEqual(expect, result,
1439 "url2pathname() failed; %s != %s" %
1440 (expect, result))
1441 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001442 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1443 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001444 self.assertEqual(expect, result,
1445 "pathname2url() failed; %s != %s" %
1446 (expect, result))
1447 given = "make+sure/using_unquote"
1448 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001449 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001450 self.assertEqual(expect, result,
1451 "url2pathname() failed; %s != %s" %
1452 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001453
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001454 @unittest.skipUnless(sys.platform == 'win32',
1455 'test specific to the urllib.url2path function.')
1456 def test_ntpath(self):
1457 given = ('/C:/', '///C:/', '/C|//')
1458 expect = 'C:\\'
1459 for url in given:
1460 result = urllib.request.url2pathname(url)
1461 self.assertEqual(expect, result,
1462 'urllib.request..url2pathname() failed; %s != %s' %
1463 (expect, result))
1464 given = '///C|/path'
1465 expect = 'C:\\path'
1466 result = urllib.request.url2pathname(given)
1467 self.assertEqual(expect, result,
1468 'urllib.request.url2pathname() failed; %s != %s' %
1469 (expect, result))
1470
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001471class Utility_Tests(unittest.TestCase):
1472 """Testcase to test the various utility functions in the urllib."""
1473
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001474 def test_thishost(self):
1475 """Test the urllib.request.thishost utility function returns a tuple"""
1476 self.assertIsInstance(urllib.request.thishost(), tuple)
1477
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001478
Xtreakc661b302019-05-19 19:10:06 +05301479class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001480 """Testcase to test the open method of URLopener class."""
1481
1482 def test_quoted_open(self):
1483 class DummyURLopener(urllib.request.URLopener):
1484 def open_spam(self, url):
1485 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001486 with support.check_warnings(
1487 ('DummyURLopener style of invoking requests is deprecated.',
1488 DeprecationWarning)):
1489 self.assertEqual(DummyURLopener().open(
1490 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001491
Ezio Melotti79b99db2013-02-21 02:41:42 +02001492 # test the safe characters are not quoted by urlopen
1493 self.assertEqual(DummyURLopener().open(
1494 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1495 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001496
Xtreakc661b302019-05-19 19:10:06 +05301497 @support.ignore_warnings(category=DeprecationWarning)
1498 def test_urlopener_retrieve_file(self):
1499 with support.temp_dir() as tmpdir:
1500 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1501 os.close(fd)
1502 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1503 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001504 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1505 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301506
1507 @support.ignore_warnings(category=DeprecationWarning)
1508 def test_urlopener_retrieve_remote(self):
1509 url = "http://www.python.org/file.txt"
1510 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1511 self.addCleanup(self.unfakehttp)
1512 filename, _ = urllib.request.URLopener().retrieve(url)
1513 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1514
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001515 @support.ignore_warnings(category=DeprecationWarning)
1516 def test_local_file_open(self):
1517 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1518 class DummyURLopener(urllib.request.URLopener):
1519 def open_local_file(self, url):
1520 return url
1521 for url in ('local_file://example', 'local-file://example'):
1522 self.assertRaises(OSError, urllib.request.urlopen, url)
1523 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1524 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1525 self.assertRaises(OSError, DummyURLopener().open, url)
1526 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1527
Xtreakc661b302019-05-19 19:10:06 +05301528
Guido van Rossume7ba4952007-06-06 23:52:48 +00001529# Just commented them out.
1530# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001531# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001532# fail in one of the tests, sometimes in other. I have a linux, and
1533# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001534# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001535# . Facundo
1536#
1537# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001538# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001539# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1540# serv.settimeout(3)
1541# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1542# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001543# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001544# try:
1545# conn, addr = serv.accept()
1546# conn.send("1 Hola mundo\n")
1547# cantdata = 0
1548# while cantdata < 13:
1549# data = conn.recv(13-cantdata)
1550# cantdata += len(data)
1551# time.sleep(.3)
1552# conn.send("2 No more lines\n")
1553# conn.close()
1554# except socket.timeout:
1555# pass
1556# finally:
1557# serv.close()
1558# evt.set()
1559#
1560# class FTPWrapperTests(unittest.TestCase):
1561#
1562# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001563# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001564# ftplib.FTP.port = 9093
1565# self.evt = threading.Event()
1566# threading.Thread(target=server, args=(self.evt,)).start()
1567# time.sleep(.1)
1568#
1569# def tearDown(self):
1570# self.evt.wait()
1571#
1572# def testBasic(self):
1573# # connects
1574# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001575# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001576#
1577# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001578# # global default timeout is ignored
1579# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001580# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001581# socket.setdefaulttimeout(30)
1582# try:
1583# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1584# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001585# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001586# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001587# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001588#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001589# def testTimeoutDefault(self):
1590# # global default timeout is used
1591# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001592# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001593# socket.setdefaulttimeout(30)
1594# try:
1595# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1596# finally:
1597# socket.setdefaulttimeout(None)
1598# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1599# ftp.close()
1600#
1601# def testTimeoutValue(self):
1602# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1603# timeout=30)
1604# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1605# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001606
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001607
Senthil Kumarande49d642011-10-16 23:54:44 +08001608class RequestTests(unittest.TestCase):
1609 """Unit tests for urllib.request.Request."""
1610
1611 def test_default_values(self):
1612 Request = urllib.request.Request
1613 request = Request("http://www.python.org")
1614 self.assertEqual(request.get_method(), 'GET')
1615 request = Request("http://www.python.org", {})
1616 self.assertEqual(request.get_method(), 'POST')
1617
1618 def test_with_method_arg(self):
1619 Request = urllib.request.Request
1620 request = Request("http://www.python.org", method='HEAD')
1621 self.assertEqual(request.method, 'HEAD')
1622 self.assertEqual(request.get_method(), 'HEAD')
1623 request = Request("http://www.python.org", {}, method='HEAD')
1624 self.assertEqual(request.method, 'HEAD')
1625 self.assertEqual(request.get_method(), 'HEAD')
1626 request = Request("http://www.python.org", method='GET')
1627 self.assertEqual(request.get_method(), 'GET')
1628 request.method = 'HEAD'
1629 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001630
1631
Senthil Kumaran277e9092013-04-10 20:51:19 -07001632class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001633
Senthil Kumaran277e9092013-04-10 20:51:19 -07001634 def test_converting_drive_letter(self):
1635 self.assertEqual(url2pathname("///C|"), 'C:')
1636 self.assertEqual(url2pathname("///C:"), 'C:')
1637 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001638
Senthil Kumaran277e9092013-04-10 20:51:19 -07001639 def test_converting_when_no_drive_letter(self):
1640 # cannot end a raw string in \
1641 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1642 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1643
1644 def test_simple_compare(self):
1645 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1646 r'C:\foo\bar\spam.foo')
1647
1648 def test_non_ascii_drive_letter(self):
1649 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1650
1651 def test_roundtrip_url2pathname(self):
1652 list_of_paths = ['C:',
1653 r'\\\C\test\\',
1654 r'C:\foo\bar\spam.foo'
1655 ]
1656 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001657 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001658
1659class PathName2URLTests(unittest.TestCase):
1660
1661 def test_converting_drive_letter(self):
1662 self.assertEqual(pathname2url("C:"), '///C:')
1663 self.assertEqual(pathname2url("C:\\"), '///C:')
1664
1665 def test_converting_when_no_drive_letter(self):
1666 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1667 '/////folder/test/')
1668 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1669 '////folder/test/')
1670 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1671 '/folder/test/')
1672
1673 def test_simple_compare(self):
1674 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1675 "///C:/foo/bar/spam.foo" )
1676
1677 def test_long_drive_letter(self):
1678 self.assertRaises(IOError, pathname2url, "XX:\\")
1679
1680 def test_roundtrip_pathname2url(self):
1681 list_of_paths = ['///C:',
1682 '/////folder/test/',
1683 '///C:/foo/bar/spam.foo']
1684 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001685 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001686
1687if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001688 unittest.main()