blob: 1c247c5d1543a20cadb99235572b1d50b9e935a8 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -070059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -070093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def tearDown(self):
157 """Shut down the open object"""
158 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000159 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_interface(self):
162 # Make sure object returned by urlopen() has the specified methods
163 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000164 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000165 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "object returned by urlopen() lacks %s attribute" %
167 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_read(self):
170 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_readline(self):
173 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000174 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 "calling readline() after exhausting the file did not"
176 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_readlines(self):
179 lines_list = self.returned_obj.readlines()
180 self.assertEqual(len(lines_list), 1,
181 "readlines() returned the wrong number of lines")
182 self.assertEqual(lines_list[0], self.text,
183 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 def test_fileno(self):
186 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000187 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 self.assertEqual(os.read(file_num, len(self.text)), self.text,
189 "Reading on the file descriptor returned by fileno() "
190 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800193 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000194 # by the tearDown() method for the test
195 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000198 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000199
Brett Cannon74bfd702003-04-25 09:39:47 +0000200 def test_geturl(self):
201 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000202
Christian Heimes9bd667a2008-01-20 15:14:11 +0000203 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000204 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000205
Brett Cannon74bfd702003-04-25 09:39:47 +0000206 def test_iter(self):
207 # Test iterator
208 # Don't need to count number of iterations since test would fail the
209 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200210 # comparison.
211 # Use the iterator in the usual implicit way to test for ticket #4608.
212 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000213 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000214
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800215 def test_relativelocalfile(self):
216 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
217
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700218
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000219class ProxyTests(unittest.TestCase):
220
221 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000222 # Records changes to env vars
223 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000224 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000225 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000226 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000227 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000228
229 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000230 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000231 self.env.__exit__()
232 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000233
234 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000235 self.env.set('NO_PROXY', 'localhost')
236 proxies = urllib.request.getproxies_environment()
237 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000238 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800239 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700240 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800241 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700242 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
243 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
244
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700245 def test_proxy_cgi_ignore(self):
246 try:
247 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
248 proxies = urllib.request.getproxies_environment()
249 self.assertEqual('http://somewhere:3128', proxies['http'])
250 self.env.set('REQUEST_METHOD', 'GET')
251 proxies = urllib.request.getproxies_environment()
252 self.assertNotIn('http', proxies)
253 finally:
254 self.env.unset('REQUEST_METHOD')
255 self.env.unset('HTTP_PROXY')
256
Martin Panteraa279822016-04-30 01:03:40 +0000257 def test_proxy_bypass_environment_host_match(self):
258 bypass = urllib.request.proxy_bypass_environment
259 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800260 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000261 self.assertTrue(bypass('localhost'))
262 self.assertTrue(bypass('LocalHost')) # MixedCase
263 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800264 self.assertTrue(bypass('.localhost'))
Martin Panteraa279822016-04-30 01:03:40 +0000265 self.assertTrue(bypass('newdomain.com:1234'))
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800266 self.assertTrue(bypass('.newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800267 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800268 self.assertTrue(bypass('d.o.t'))
Martin Panteraa279822016-04-30 01:03:40 +0000269 self.assertTrue(bypass('anotherdomain.com:8888'))
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800270 self.assertTrue(bypass('.anotherdomain.com:8888'))
Martin Panteraa279822016-04-30 01:03:40 +0000271 self.assertTrue(bypass('www.newdomain.com:1234'))
272 self.assertFalse(bypass('prelocalhost'))
273 self.assertFalse(bypass('newdomain.com')) # no port
274 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700275
Miss Islington (bot)fc84d502020-01-05 04:32:00 -0800276 def test_proxy_bypass_environment_always_match(self):
277 bypass = urllib.request.proxy_bypass_environment
278 self.env.set('NO_PROXY', '*')
279 self.assertTrue(bypass('newdomain.com'))
280 self.assertTrue(bypass('newdomain.com:1234'))
281 self.env.set('NO_PROXY', '*, anotherdomain.com')
282 self.assertTrue(bypass('anotherdomain.com'))
283 self.assertFalse(bypass('newdomain.com'))
284 self.assertFalse(bypass('newdomain.com:1234'))
285
286 def test_proxy_bypass_environment_newline(self):
287 bypass = urllib.request.proxy_bypass_environment
288 self.env.set('NO_PROXY',
289 'localhost, anotherdomain.com, newdomain.com:1234')
290 self.assertFalse(bypass('localhost\n'))
291 self.assertFalse(bypass('anotherdomain.com:8888\n'))
292 self.assertFalse(bypass('newdomain.com:1234\n'))
293
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700294
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700295class ProxyTests_withOrderedEnv(unittest.TestCase):
296
297 def setUp(self):
298 # We need to test conditions, where variable order _is_ significant
299 self._saved_env = os.environ
300 # Monkey patch os.environ, start with empty fake environment
301 os.environ = collections.OrderedDict()
302
303 def tearDown(self):
304 os.environ = self._saved_env
305
306 def test_getproxies_environment_prefer_lowercase(self):
307 # Test lowercase preference with removal
308 os.environ['no_proxy'] = ''
309 os.environ['No_Proxy'] = 'localhost'
310 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
311 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
312 os.environ['http_proxy'] = ''
313 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
314 proxies = urllib.request.getproxies_environment()
315 self.assertEqual({}, proxies)
316 # Test lowercase preference of proxy bypass and correct matching including ports
317 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
318 os.environ['No_Proxy'] = 'xyz.com'
319 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
320 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
321 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
322 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
323 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
324 # Test lowercase preference with replacement
325 os.environ['http_proxy'] = 'http://somewhere:3128'
326 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
327 proxies = urllib.request.getproxies_environment()
328 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000329
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700330
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700331class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000332 """Test urlopen() opening a fake http connection."""
333
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000334 def check_read(self, ver):
335 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000336 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000337 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000338 self.assertEqual(fp.readline(), b"Hello!")
339 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000340 self.assertEqual(fp.geturl(), 'http://python.org/')
341 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000342 finally:
343 self.unfakehttp()
344
Senthil Kumaran26430412011-04-13 07:01:19 +0800345 def test_url_fragment(self):
346 # Issue #11703: geturl() omits fragments in the original URL.
347 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800348 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800349 try:
350 fp = urllib.request.urlopen(url)
351 self.assertEqual(fp.geturl(), url)
352 finally:
353 self.unfakehttp()
354
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800355 def test_willclose(self):
356 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800357 try:
358 resp = urlopen("http://www.python.org")
359 self.assertTrue(resp.fp.will_close)
360 finally:
361 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800362
Xtreak2fc936e2019-05-01 17:29:49 +0530363 @unittest.skipUnless(ssl, "ssl module required")
Miss Islington (bot)ff69c9d2020-03-14 12:13:32 -0700364 def test_url_path_with_control_char_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700365 for char_no in list(range(0, 0x21)) + [0x7f]:
366 char = chr(char_no)
367 schemeless_url = f"//localhost:7777/test{char}/"
368 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
369 try:
370 # We explicitly test urllib.request.urlopen() instead of the top
371 # level 'def urlopen()' function defined in this... (quite ugly)
372 # test suite. They use different url opening codepaths. Plain
373 # urlopen uses FancyURLOpener which goes via a codepath that
374 # calls urllib.parse.quote() on the URL which makes all of the
375 # above attempts at injection within the url _path_ safe.
376 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400377 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700378 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400379 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700380 urllib.request.urlopen(f"http:{schemeless_url}")
381 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400382 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700383 urllib.request.urlopen(f"https:{schemeless_url}")
384 # This code path quotes the URL so there is no injection.
385 resp = urlopen(f"http:{schemeless_url}")
386 self.assertNotIn(char, resp.geturl())
387 finally:
388 self.unfakehttp()
389
Xtreak2fc936e2019-05-01 17:29:49 +0530390 @unittest.skipUnless(ssl, "ssl module required")
Miss Islington (bot)ff69c9d2020-03-14 12:13:32 -0700391 def test_url_path_with_newline_header_injection_rejected(self):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700392 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
393 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
394 schemeless_url = "//" + host + ":8080/test/?test=a"
395 try:
396 # We explicitly test urllib.request.urlopen() instead of the top
397 # level 'def urlopen()' function defined in this... (quite ugly)
398 # test suite. They use different url opening codepaths. Plain
399 # urlopen uses FancyURLOpener which goes via a codepath that
400 # calls urllib.parse.quote() on the URL which makes all of the
401 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400402 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700403 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400404 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700405 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400406 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700407 urllib.request.urlopen(f"https:{schemeless_url}")
408 # This code path quotes the URL so there is no injection.
409 resp = urlopen(f"http:{schemeless_url}")
410 self.assertNotIn(' ', resp.geturl())
411 self.assertNotIn('\r', resp.geturl())
412 self.assertNotIn('\n', resp.geturl())
413 finally:
414 self.unfakehttp()
415
Miss Islington (bot)ff69c9d2020-03-14 12:13:32 -0700416 @unittest.skipUnless(ssl, "ssl module required")
417 def test_url_host_with_control_char_rejected(self):
418 for char_no in list(range(0, 0x21)) + [0x7f]:
419 char = chr(char_no)
420 schemeless_url = f"//localhost{char}/test/"
421 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
422 try:
423 escaped_char_repr = repr(char).replace('\\', r'\\')
424 InvalidURL = http.client.InvalidURL
425 with self.assertRaisesRegex(
426 InvalidURL, f"contain control.*{escaped_char_repr}"):
427 urlopen(f"http:{schemeless_url}")
428 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
429 urlopen(f"https:{schemeless_url}")
430 finally:
431 self.unfakehttp()
432
433 @unittest.skipUnless(ssl, "ssl module required")
434 def test_url_host_with_newline_header_injection_rejected(self):
435 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
436 host = "localhost\r\nX-injected: header\r\n"
437 schemeless_url = "//" + host + ":8080/test/?test=a"
438 try:
439 InvalidURL = http.client.InvalidURL
440 with self.assertRaisesRegex(
441 InvalidURL, r"contain control.*\\r"):
442 urlopen(f"http:{schemeless_url}")
443 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
444 urlopen(f"https:{schemeless_url}")
445 finally:
446 self.unfakehttp()
447
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000448 def test_read_0_9(self):
449 # "0.9" response accepted (but not "simple responses" without
450 # a status line)
451 self.check_read(b"0.9")
452
453 def test_read_1_0(self):
454 self.check_read(b"1.0")
455
456 def test_read_1_1(self):
457 self.check_read(b"1.1")
458
Christian Heimes57dddfb2008-01-02 18:30:52 +0000459 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200460 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000461 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
462Date: Wed, 02 Jan 2008 03:03:54 GMT
463Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
464Connection: close
465Content-Type: text/html; charset=iso-8859-1
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700466''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000467 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200468 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000469 finally:
470 self.unfakehttp()
471
guido@google.coma119df92011-03-29 11:41:02 -0700472 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200473 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700474 self.fakehttp(b'''HTTP/1.1 302 Found
475Date: Wed, 02 Jan 2008 03:03:54 GMT
476Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
477Location: file://guidocomputer.athome.com:/python/license
478Connection: close
479Content-Type: text/html; charset=iso-8859-1
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700480''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700481 try:
Martin Pantera0370222016-02-04 06:01:35 +0000482 msg = "Redirection to url 'file:"
483 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
484 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700485 finally:
486 self.unfakehttp()
487
Martin Pantera0370222016-02-04 06:01:35 +0000488 def test_redirect_limit_independent(self):
489 # Ticket #12923: make sure independent requests each use their
490 # own retry limit.
491 for i in range(FancyURLopener().maxtries):
492 self.fakehttp(b'''HTTP/1.1 302 Found
493Location: file://guidocomputer.athome.com:/python/license
494Connection: close
Miss Islington (bot)9d37ae02019-06-11 19:26:02 -0700495''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000496 try:
497 self.assertRaises(urllib.error.HTTPError, urlopen,
498 "http://something")
499 finally:
500 self.unfakehttp()
501
Guido van Rossumd8faa362007-04-27 19:54:29 +0000502 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200503 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000504 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000505 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000506 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200507 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000508 finally:
509 self.unfakehttp()
510
Senthil Kumaranf5776862012-10-21 13:30:02 -0700511 def test_missing_localfile(self):
512 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700513 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700514 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700515 self.assertTrue(e.exception.filename)
516 self.assertTrue(e.exception.reason)
517
518 def test_file_notexists(self):
519 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700520 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700521 try:
522 self.assertTrue(os.path.exists(tmp_file))
523 with urlopen(tmp_fileurl) as fobj:
524 self.assertTrue(fobj)
525 finally:
526 os.close(fd)
527 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700528 self.assertFalse(os.path.exists(tmp_file))
529 with self.assertRaises(urllib.error.URLError):
530 urlopen(tmp_fileurl)
531
532 def test_ftp_nohost(self):
533 test_ftp_url = 'ftp:///path'
534 with self.assertRaises(urllib.error.URLError) as e:
535 urlopen(test_ftp_url)
536 self.assertFalse(e.exception.filename)
537 self.assertTrue(e.exception.reason)
538
539 def test_ftp_nonexisting(self):
540 with self.assertRaises(urllib.error.URLError) as e:
541 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
542 self.assertFalse(e.exception.filename)
543 self.assertTrue(e.exception.reason)
544
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700545 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
546 def test_ftp_cache_pruning(self):
547 self.fakeftp()
548 try:
549 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
550 urlopen('ftp://localhost')
551 finally:
552 self.unfakeftp()
553
Senthil Kumarande0eb242010-08-01 17:53:37 +0000554 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000555 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000556 try:
557 fp = urlopen("http://user:pass@python.org/")
558 self.assertEqual(fp.readline(), b"Hello!")
559 self.assertEqual(fp.readline(), b"")
560 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
561 self.assertEqual(fp.getcode(), 200)
562 finally:
563 self.unfakehttp()
564
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800565 def test_userpass_inurl_w_spaces(self):
566 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
567 try:
568 userpass = "a b:c d"
569 url = "http://{}@python.org/".format(userpass)
570 fakehttp_wrapper = http.client.HTTPConnection
571 authorization = ("Authorization: Basic %s\r\n" %
572 b64encode(userpass.encode("ASCII")).decode("ASCII"))
573 fp = urlopen(url)
574 # The authorization header must be in place
575 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
576 self.assertEqual(fp.readline(), b"Hello!")
577 self.assertEqual(fp.readline(), b"")
578 # the spaces are quoted in URL so no match
579 self.assertNotEqual(fp.geturl(), url)
580 self.assertEqual(fp.getcode(), 200)
581 finally:
582 self.unfakehttp()
583
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700584 def test_URLopener_deprecation(self):
585 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700586 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700587
Antoine Pitrou07df6552014-11-02 17:23:14 +0100588 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800589 def test_cafile_and_context(self):
590 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200591 with support.check_warnings(('', DeprecationWarning)):
592 with self.assertRaises(ValueError):
593 urllib.request.urlopen(
594 "https://localhost", cafile="/nonexistent/path", context=context
595 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800596
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700597
Antoine Pitroudf204be2012-11-24 17:59:08 +0100598class urlopen_DataTests(unittest.TestCase):
599 """Test urlopen() opening a data URL."""
600
601 def setUp(self):
602 # text containing URL special- and unicode-characters
603 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
604 # 2x1 pixel RGB PNG image with one black and one white pixel
605 self.image = (
606 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
607 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
608 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
609 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
610
611 self.text_url = (
612 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
613 "D%26%20%C3%B6%20%C3%84%20")
614 self.text_url_base64 = (
615 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
616 "sJT0mIPYgxCA%3D")
617 # base64 encoded data URL that contains ignorable spaces,
618 # such as "\n", " ", "%0A", and "%20".
619 self.image_url = (
620 "\n"
621 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
622 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
623
624 self.text_url_resp = urllib.request.urlopen(self.text_url)
625 self.text_url_base64_resp = urllib.request.urlopen(
626 self.text_url_base64)
627 self.image_url_resp = urllib.request.urlopen(self.image_url)
628
629 def test_interface(self):
630 # Make sure object returned by urlopen() has the specified methods
631 for attr in ("read", "readline", "readlines",
632 "close", "info", "geturl", "getcode", "__iter__"):
633 self.assertTrue(hasattr(self.text_url_resp, attr),
634 "object returned by urlopen() lacks %s attribute" %
635 attr)
636
637 def test_info(self):
638 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
639 self.assertEqual(self.text_url_base64_resp.info().get_params(),
640 [('text/plain', ''), ('charset', 'ISO-8859-1')])
641 self.assertEqual(self.image_url_resp.info()['content-length'],
642 str(len(self.image)))
643 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
644 [('text/plain', ''), ('charset', 'US-ASCII')])
645
646 def test_geturl(self):
647 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
648 self.assertEqual(self.text_url_base64_resp.geturl(),
649 self.text_url_base64)
650 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
651
652 def test_read_text(self):
653 self.assertEqual(self.text_url_resp.read().decode(
654 dict(self.text_url_resp.info().get_params())['charset']), self.text)
655
656 def test_read_text_base64(self):
657 self.assertEqual(self.text_url_base64_resp.read().decode(
658 dict(self.text_url_base64_resp.info().get_params())['charset']),
659 self.text)
660
661 def test_read_image(self):
662 self.assertEqual(self.image_url_resp.read(), self.image)
663
664 def test_missing_comma(self):
665 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
666
667 def test_invalid_base64_data(self):
668 # missing padding character
669 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
670
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700671
Brett Cannon19691362003-04-29 05:08:06 +0000672class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000673 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000674
Brett Cannon19691362003-04-29 05:08:06 +0000675 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000676 # Create a list of temporary files. Each item in the list is a file
677 # name (absolute path or relative to the current working directory).
678 # All files in this list will be deleted in the tearDown method. Note,
679 # this only helps to makes sure temporary files get deleted, but it
680 # does nothing about trying to close files that may still be open. It
681 # is the responsibility of the developer to properly close files even
682 # when exceptional conditions occur.
683 self.tempFiles = []
684
Brett Cannon19691362003-04-29 05:08:06 +0000685 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000686 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000687 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000688 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000689 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000690 FILE.write(self.text)
691 FILE.close()
692 finally:
693 try: FILE.close()
694 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000695
696 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000697 # Delete the temporary files.
698 for each in self.tempFiles:
699 try: os.remove(each)
700 except: pass
701
702 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000703 filePath = os.path.abspath(filePath)
704 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000705 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000706 except UnicodeEncodeError:
707 raise unittest.SkipTest("filePath is not encodable to utf8")
708 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000709
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000710 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000711 """Creates a new temporary file containing the specified data,
712 registers the file for deletion during the test fixture tear down, and
713 returns the absolute path of the file."""
714
715 newFd, newFilePath = tempfile.mkstemp()
716 try:
717 self.registerFileForCleanUp(newFilePath)
718 newFile = os.fdopen(newFd, "wb")
719 newFile.write(data)
720 newFile.close()
721 finally:
722 try: newFile.close()
723 except: pass
724 return newFilePath
725
726 def registerFileForCleanUp(self, fileName):
727 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000728
729 def test_basic(self):
730 # Make sure that a local file just gets its own location returned and
731 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000732 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000733 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000734 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000735 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000736 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000737
738 def test_copy(self):
739 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000740 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000741 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000742 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000743 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000744 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000745 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000746 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000747 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000748 try:
749 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000750 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000751 finally:
752 try: FILE.close()
753 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000754 self.assertEqual(self.text, text)
755
756 def test_reporthook(self):
757 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700758 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
759 self.assertIsInstance(block_count, int)
760 self.assertIsInstance(block_read_size, int)
761 self.assertIsInstance(file_size, int)
762 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000763 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000764 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000765 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000766 urllib.request.urlretrieve(
767 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000768 second_temp, hooktester)
769
770 def test_reporthook_0_bytes(self):
771 # Test on zero length file. Should call reporthook only 1 time.
772 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700773 def hooktester(block_count, block_read_size, file_size, _report=report):
774 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000775 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000776 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000777 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000778 self.assertEqual(len(report), 1)
779 self.assertEqual(report[0][2], 0)
780
781 def test_reporthook_5_bytes(self):
782 # Test on 5 byte file. Should call reporthook only 2 times (once when
783 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700784 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000785 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700786 def hooktester(block_count, block_read_size, file_size, _report=report):
787 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000788 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000789 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000790 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000791 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800792 self.assertEqual(report[0][2], 5)
793 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000794
795 def test_reporthook_8193_bytes(self):
796 # Test on 8193 byte file. Should call reporthook only 3 times (once
797 # when the "network connection" is established, once for the next 8192
798 # bytes, and once for the last byte).
799 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700800 def hooktester(block_count, block_read_size, file_size, _report=report):
801 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000802 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000803 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000804 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000805 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800806 self.assertEqual(report[0][2], 8193)
807 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700808 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800809 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000810
Senthil Kumarance260142011-11-01 01:35:17 +0800811
812class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
813 """Test urllib.urlretrieve() using fake http connections"""
814
815 def test_short_content_raises_ContentTooShortError(self):
816 self.fakehttp(b'''HTTP/1.1 200 OK
817Date: Wed, 02 Jan 2008 03:03:54 GMT
818Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
819Connection: close
820Content-Length: 100
821Content-Type: text/html; charset=iso-8859-1
822
823FF
824''')
825
826 def _reporthook(par1, par2, par3):
827 pass
828
829 with self.assertRaises(urllib.error.ContentTooShortError):
830 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100831 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800832 reporthook=_reporthook)
833 finally:
834 self.unfakehttp()
835
836 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
837 self.fakehttp(b'''HTTP/1.1 200 OK
838Date: Wed, 02 Jan 2008 03:03:54 GMT
839Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
840Connection: close
841Content-Length: 100
842Content-Type: text/html; charset=iso-8859-1
843
844FF
845''')
846 with self.assertRaises(urllib.error.ContentTooShortError):
847 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100848 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800849 finally:
850 self.unfakehttp()
851
852
Brett Cannon74bfd702003-04-25 09:39:47 +0000853class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400854 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000855
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530856 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000857 character you write it as '%' + <2 character US-ASCII hex value>.
858 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
859 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000860
861 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000862
Brett Cannon74bfd702003-04-25 09:39:47 +0000863 Reserved characters : ";/?:@&=+$,"
864 Have special meaning in URIs and must be escaped if not being used for
865 their special meaning
866 Data characters : letters, digits, and "-_.!~*'()"
867 Unreserved and do not need to be escaped; can be, though, if desired
868 Control characters : 0x00 - 0x1F, 0x7F
869 Have no use in URIs so must be escaped
870 space : 0x20
871 Must be escaped
872 Delimiters : '<>#%"'
873 Must be escaped
874 Unwise : "{}|\^[]`"
875 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000876
Brett Cannon74bfd702003-04-25 09:39:47 +0000877 """
878
879 def test_never_quote(self):
880 # Make sure quote() does not quote letters, digits, and "_,.-"
881 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
882 "abcdefghijklmnopqrstuvwxyz",
883 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530884 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000885 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000886 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000887 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000888 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000889 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000890 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000891
892 def test_default_safe(self):
893 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000894 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000895
896 def test_safe(self):
897 # Test setting 'safe' parameter does what it should do
898 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000899 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000900 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000901 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000902 result = urllib.parse.quote_plus(quote_by_default,
903 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000904 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000905 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000906 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000907 # Safe expressed as bytes rather than str
908 result = urllib.parse.quote(quote_by_default, safe=b"<>")
909 self.assertEqual(quote_by_default, result,
910 "using quote(): %r != %r" % (quote_by_default, result))
911 # "Safe" non-ASCII characters should have no effect
912 # (Since URIs are not allowed to have non-ASCII characters)
913 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
914 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
915 self.assertEqual(expect, result,
916 "using quote(): %r != %r" %
917 (expect, result))
918 # Same as above, but using a bytes rather than str
919 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
920 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
921 self.assertEqual(expect, result,
922 "using quote(): %r != %r" %
923 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000924
925 def test_default_quoting(self):
926 # Make sure all characters that should be quoted are by default sans
927 # space (separate test for that).
928 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400929 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000930 should_quote.append(chr(127)) # For 0x7F
931 should_quote = ''.join(should_quote)
932 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000933 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000934 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000935 "using quote(): "
936 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000937 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000938 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000939 self.assertEqual(hexescape(char), result,
940 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000941 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000942 (char, hexescape(char), result))
943 del should_quote
944 partial_quote = "ab[]cd"
945 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000946 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000947 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000948 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800949 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000950 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000951 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000952
953 def test_quoting_space(self):
954 # Make sure quote() and quote_plus() handle spaces as specified in
955 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000956 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000957 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000958 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000959 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000960 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000961 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000962 given = "a b cd e f"
963 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000964 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000965 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000966 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000967 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000968 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000969 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000970 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000971
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000972 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000973 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000974 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000975 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000976 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000977 # Test with bytes
978 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
979 'alpha%2Bbeta+gamma')
980 # Test with safe bytes
981 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
982 'alpha+beta+gamma')
983
984 def test_quote_bytes(self):
985 # Bytes should quote directly to percent-encoded values
986 given = b"\xa2\xd8ab\xff"
987 expect = "%A2%D8ab%FF"
988 result = urllib.parse.quote(given)
989 self.assertEqual(expect, result,
990 "using quote(): %r != %r" % (expect, result))
991 # Encoding argument should raise type error on bytes input
992 self.assertRaises(TypeError, urllib.parse.quote, given,
993 encoding="latin-1")
994 # quote_from_bytes should work the same
995 result = urllib.parse.quote_from_bytes(given)
996 self.assertEqual(expect, result,
997 "using quote_from_bytes(): %r != %r"
998 % (expect, result))
999
1000 def test_quote_with_unicode(self):
1001 # Characters in Latin-1 range, encoded by default in UTF-8
1002 given = "\xa2\xd8ab\xff"
1003 expect = "%C2%A2%C3%98ab%C3%BF"
1004 result = urllib.parse.quote(given)
1005 self.assertEqual(expect, result,
1006 "using quote(): %r != %r" % (expect, result))
1007 # Characters in Latin-1 range, encoded by with None (default)
1008 result = urllib.parse.quote(given, encoding=None, errors=None)
1009 self.assertEqual(expect, result,
1010 "using quote(): %r != %r" % (expect, result))
1011 # Characters in Latin-1 range, encoded with Latin-1
1012 given = "\xa2\xd8ab\xff"
1013 expect = "%A2%D8ab%FF"
1014 result = urllib.parse.quote(given, encoding="latin-1")
1015 self.assertEqual(expect, result,
1016 "using quote(): %r != %r" % (expect, result))
1017 # Characters in BMP, encoded by default in UTF-8
1018 given = "\u6f22\u5b57" # "Kanji"
1019 expect = "%E6%BC%A2%E5%AD%97"
1020 result = urllib.parse.quote(given)
1021 self.assertEqual(expect, result,
1022 "using quote(): %r != %r" % (expect, result))
1023 # Characters in BMP, encoded with Latin-1
1024 given = "\u6f22\u5b57"
1025 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1026 encoding="latin-1")
1027 # Characters in BMP, encoded with Latin-1, with replace error handling
1028 given = "\u6f22\u5b57"
1029 expect = "%3F%3F" # "??"
1030 result = urllib.parse.quote(given, encoding="latin-1",
1031 errors="replace")
1032 self.assertEqual(expect, result,
1033 "using quote(): %r != %r" % (expect, result))
1034 # Characters in BMP, Latin-1, with xmlcharref error handling
1035 given = "\u6f22\u5b57"
1036 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1037 result = urllib.parse.quote(given, encoding="latin-1",
1038 errors="xmlcharrefreplace")
1039 self.assertEqual(expect, result,
1040 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001041
Georg Brandlfaf41492009-05-26 18:31:11 +00001042 def test_quote_plus_with_unicode(self):
1043 # Encoding (latin-1) test for quote_plus
1044 given = "\xa2\xd8 \xff"
1045 expect = "%A2%D8+%FF"
1046 result = urllib.parse.quote_plus(given, encoding="latin-1")
1047 self.assertEqual(expect, result,
1048 "using quote_plus(): %r != %r" % (expect, result))
1049 # Errors test for quote_plus
1050 given = "ab\u6f22\u5b57 cd"
1051 expect = "ab%3F%3F+cd"
1052 result = urllib.parse.quote_plus(given, encoding="latin-1",
1053 errors="replace")
1054 self.assertEqual(expect, result,
1055 "using quote_plus(): %r != %r" % (expect, result))
1056
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001057
Brett Cannon74bfd702003-04-25 09:39:47 +00001058class UnquotingTests(unittest.TestCase):
1059 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001060
Brett Cannon74bfd702003-04-25 09:39:47 +00001061 See the doc string for quoting_Tests for details on quoting and such.
1062
1063 """
1064
1065 def test_unquoting(self):
1066 # Make sure unquoting of all ASCII values works
1067 escape_list = []
1068 for num in range(128):
1069 given = hexescape(chr(num))
1070 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001071 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001072 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001073 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001074 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001075 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001076 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001077 (expect, result))
1078 escape_list.append(given)
1079 escape_string = ''.join(escape_list)
1080 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001081 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001082 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001083 "using unquote(): not all characters escaped: "
1084 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001085 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1086 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +00001087 with support.check_warnings(('', BytesWarning), quiet=True):
1088 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +00001089
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001090 def test_unquoting_badpercent(self):
1091 # Test unquoting on bad percent-escapes
1092 given = '%xab'
1093 expect = given
1094 result = urllib.parse.unquote(given)
1095 self.assertEqual(expect, result, "using unquote(): %r != %r"
1096 % (expect, result))
1097 given = '%x'
1098 expect = given
1099 result = urllib.parse.unquote(given)
1100 self.assertEqual(expect, result, "using unquote(): %r != %r"
1101 % (expect, result))
1102 given = '%'
1103 expect = given
1104 result = urllib.parse.unquote(given)
1105 self.assertEqual(expect, result, "using unquote(): %r != %r"
1106 % (expect, result))
1107 # unquote_to_bytes
1108 given = '%xab'
1109 expect = bytes(given, 'ascii')
1110 result = urllib.parse.unquote_to_bytes(given)
1111 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1112 % (expect, result))
1113 given = '%x'
1114 expect = bytes(given, 'ascii')
1115 result = urllib.parse.unquote_to_bytes(given)
1116 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1117 % (expect, result))
1118 given = '%'
1119 expect = bytes(given, 'ascii')
1120 result = urllib.parse.unquote_to_bytes(given)
1121 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1122 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001123 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1124 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001125
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001126 def test_unquoting_mixed_case(self):
1127 # Test unquoting on mixed-case hex digits in the percent-escapes
1128 given = '%Ab%eA'
1129 expect = b'\xab\xea'
1130 result = urllib.parse.unquote_to_bytes(given)
1131 self.assertEqual(expect, result,
1132 "using unquote_to_bytes(): %r != %r"
1133 % (expect, result))
1134
Brett Cannon74bfd702003-04-25 09:39:47 +00001135 def test_unquoting_parts(self):
1136 # Make sure unquoting works when have non-quoted characters
1137 # interspersed
1138 given = 'ab%sd' % hexescape('c')
1139 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001140 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001141 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001142 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001143 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001144 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001145 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001146
Brett Cannon74bfd702003-04-25 09:39:47 +00001147 def test_unquoting_plus(self):
1148 # Test difference between unquote() and unquote_plus()
1149 given = "are+there+spaces..."
1150 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001151 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001152 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001153 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001154 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001155 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001156 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001157 "using unquote_plus(): %r != %r" % (expect, result))
1158
1159 def test_unquote_to_bytes(self):
1160 given = 'br%C3%BCckner_sapporo_20050930.doc'
1161 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1162 result = urllib.parse.unquote_to_bytes(given)
1163 self.assertEqual(expect, result,
1164 "using unquote_to_bytes(): %r != %r"
1165 % (expect, result))
1166 # Test on a string with unescaped non-ASCII characters
1167 # (Technically an invalid URI; expect those characters to be UTF-8
1168 # encoded).
1169 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1170 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1171 self.assertEqual(expect, result,
1172 "using unquote_to_bytes(): %r != %r"
1173 % (expect, result))
1174 # Test with a bytes as input
1175 given = b'%A2%D8ab%FF'
1176 expect = b'\xa2\xd8ab\xff'
1177 result = urllib.parse.unquote_to_bytes(given)
1178 self.assertEqual(expect, result,
1179 "using unquote_to_bytes(): %r != %r"
1180 % (expect, result))
1181 # Test with a bytes as input, with unescaped non-ASCII bytes
1182 # (Technically an invalid URI; expect those bytes to be preserved)
1183 given = b'%A2\xd8ab%FF'
1184 expect = b'\xa2\xd8ab\xff'
1185 result = urllib.parse.unquote_to_bytes(given)
1186 self.assertEqual(expect, result,
1187 "using unquote_to_bytes(): %r != %r"
1188 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001189
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001190 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001191 # Characters in the Latin-1 range, encoded with UTF-8
1192 given = 'br%C3%BCckner_sapporo_20050930.doc'
1193 expect = 'br\u00fcckner_sapporo_20050930.doc'
1194 result = urllib.parse.unquote(given)
1195 self.assertEqual(expect, result,
1196 "using unquote(): %r != %r" % (expect, result))
1197 # Characters in the Latin-1 range, encoded with None (default)
1198 result = urllib.parse.unquote(given, encoding=None, errors=None)
1199 self.assertEqual(expect, result,
1200 "using unquote(): %r != %r" % (expect, result))
1201
1202 # Characters in the Latin-1 range, encoded with Latin-1
1203 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1204 encoding="latin-1")
1205 expect = 'br\u00fcckner_sapporo_20050930.doc'
1206 self.assertEqual(expect, result,
1207 "using unquote(): %r != %r" % (expect, result))
1208
1209 # Characters in BMP, encoded with UTF-8
1210 given = "%E6%BC%A2%E5%AD%97"
1211 expect = "\u6f22\u5b57" # "Kanji"
1212 result = urllib.parse.unquote(given)
1213 self.assertEqual(expect, result,
1214 "using unquote(): %r != %r" % (expect, result))
1215
1216 # Decode with UTF-8, invalid sequence
1217 given = "%F3%B1"
1218 expect = "\ufffd" # Replacement character
1219 result = urllib.parse.unquote(given)
1220 self.assertEqual(expect, result,
1221 "using unquote(): %r != %r" % (expect, result))
1222
1223 # Decode with UTF-8, invalid sequence, replace errors
1224 result = urllib.parse.unquote(given, errors="replace")
1225 self.assertEqual(expect, result,
1226 "using unquote(): %r != %r" % (expect, result))
1227
1228 # Decode with UTF-8, invalid sequence, ignoring errors
1229 given = "%F3%B1"
1230 expect = ""
1231 result = urllib.parse.unquote(given, errors="ignore")
1232 self.assertEqual(expect, result,
1233 "using unquote(): %r != %r" % (expect, result))
1234
1235 # A mix of non-ASCII and percent-encoded characters, UTF-8
1236 result = urllib.parse.unquote("\u6f22%C3%BC")
1237 expect = '\u6f22\u00fc'
1238 self.assertEqual(expect, result,
1239 "using unquote(): %r != %r" % (expect, result))
1240
1241 # A mix of non-ASCII and percent-encoded characters, Latin-1
1242 # (Note, the string contains non-Latin-1-representable characters)
1243 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1244 expect = '\u6f22\u00fc'
1245 self.assertEqual(expect, result,
1246 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001247
Brett Cannon74bfd702003-04-25 09:39:47 +00001248class urlencode_Tests(unittest.TestCase):
1249 """Tests for urlencode()"""
1250
1251 def help_inputtype(self, given, test_type):
1252 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001253
Brett Cannon74bfd702003-04-25 09:39:47 +00001254 'given' must lead to only the pairs:
1255 * 1st, 1
1256 * 2nd, 2
1257 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001258
Brett Cannon74bfd702003-04-25 09:39:47 +00001259 Test cannot assume anything about order. Docs make no guarantee and
1260 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001261
Brett Cannon74bfd702003-04-25 09:39:47 +00001262 """
1263 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001264 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001265 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001266 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001267 "testing %s: %s not found in %s" %
1268 (test_type, expected, result))
1269 self.assertEqual(result.count('&'), 2,
1270 "testing %s: expected 2 '&'s; got %s" %
1271 (test_type, result.count('&')))
1272 amp_location = result.index('&')
1273 on_amp_left = result[amp_location - 1]
1274 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001275 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001276 "testing %s: '&' not located in proper place in %s" %
1277 (test_type, result))
1278 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1279 "testing %s: "
1280 "unexpected number of characters: %s != %s" %
1281 (test_type, len(result), (5 * 3) + 2))
1282
1283 def test_using_mapping(self):
1284 # Test passing in a mapping object as an argument.
1285 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1286 "using dict as input type")
1287
1288 def test_using_sequence(self):
1289 # Test passing in a sequence of two-item sequences as an argument.
1290 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1291 "using sequence of two-item tuples as input")
1292
1293 def test_quoting(self):
1294 # Make sure keys and values are quoted using quote_plus()
1295 given = {"&":"="}
1296 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001297 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001298 self.assertEqual(expect, result)
1299 given = {"key name":"A bunch of pluses"}
1300 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001301 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001302 self.assertEqual(expect, result)
1303
1304 def test_doseq(self):
1305 # Test that passing True for 'doseq' parameter works correctly
1306 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001307 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1308 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001309 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001310 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001311 for value in given["sequence"]:
1312 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001313 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001314 self.assertEqual(result.count('&'), 2,
1315 "Expected 2 '&'s, got %s" % result.count('&'))
1316
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001317 def test_empty_sequence(self):
1318 self.assertEqual("", urllib.parse.urlencode({}))
1319 self.assertEqual("", urllib.parse.urlencode([]))
1320
1321 def test_nonstring_values(self):
1322 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1323 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1324
1325 def test_nonstring_seq_values(self):
1326 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1327 self.assertEqual("a=None&a=a",
1328 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001329 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001330 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001331 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001332
Senthil Kumarandf022da2010-07-03 17:48:22 +00001333 def test_urlencode_encoding(self):
1334 # ASCII encoding. Expect %3F with errors="replace'
1335 given = (('\u00a0', '\u00c1'),)
1336 expect = '%3F=%3F'
1337 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1338 self.assertEqual(expect, result)
1339
1340 # Default is UTF-8 encoding.
1341 given = (('\u00a0', '\u00c1'),)
1342 expect = '%C2%A0=%C3%81'
1343 result = urllib.parse.urlencode(given)
1344 self.assertEqual(expect, result)
1345
1346 # Latin-1 encoding.
1347 given = (('\u00a0', '\u00c1'),)
1348 expect = '%A0=%C1'
1349 result = urllib.parse.urlencode(given, encoding="latin-1")
1350 self.assertEqual(expect, result)
1351
1352 def test_urlencode_encoding_doseq(self):
1353 # ASCII Encoding. Expect %3F with errors="replace'
1354 given = (('\u00a0', '\u00c1'),)
1355 expect = '%3F=%3F'
1356 result = urllib.parse.urlencode(given, doseq=True,
1357 encoding="ASCII", errors="replace")
1358 self.assertEqual(expect, result)
1359
1360 # ASCII Encoding. On a sequence of values.
1361 given = (("\u00a0", (1, "\u00c1")),)
1362 expect = '%3F=1&%3F=%3F'
1363 result = urllib.parse.urlencode(given, True,
1364 encoding="ASCII", errors="replace")
1365 self.assertEqual(expect, result)
1366
1367 # Utf-8
1368 given = (("\u00a0", "\u00c1"),)
1369 expect = '%C2%A0=%C3%81'
1370 result = urllib.parse.urlencode(given, True)
1371 self.assertEqual(expect, result)
1372
1373 given = (("\u00a0", (42, "\u00c1")),)
1374 expect = '%C2%A0=42&%C2%A0=%C3%81'
1375 result = urllib.parse.urlencode(given, True)
1376 self.assertEqual(expect, result)
1377
1378 # latin-1
1379 given = (("\u00a0", "\u00c1"),)
1380 expect = '%A0=%C1'
1381 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1382 self.assertEqual(expect, result)
1383
1384 given = (("\u00a0", (42, "\u00c1")),)
1385 expect = '%A0=42&%A0=%C1'
1386 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1387 self.assertEqual(expect, result)
1388
1389 def test_urlencode_bytes(self):
1390 given = ((b'\xa0\x24', b'\xc1\x24'),)
1391 expect = '%A0%24=%C1%24'
1392 result = urllib.parse.urlencode(given)
1393 self.assertEqual(expect, result)
1394 result = urllib.parse.urlencode(given, True)
1395 self.assertEqual(expect, result)
1396
1397 # Sequence of values
1398 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1399 expect = '%A0%24=42&%A0%24=%C1%24'
1400 result = urllib.parse.urlencode(given, True)
1401 self.assertEqual(expect, result)
1402
1403 def test_urlencode_encoding_safe_parameter(self):
1404
1405 # Send '$' (\x24) as safe character
1406 # Default utf-8 encoding
1407
1408 given = ((b'\xa0\x24', b'\xc1\x24'),)
1409 result = urllib.parse.urlencode(given, safe=":$")
1410 expect = '%A0$=%C1$'
1411 self.assertEqual(expect, result)
1412
1413 given = ((b'\xa0\x24', b'\xc1\x24'),)
1414 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1415 expect = '%A0$=%C1$'
1416 self.assertEqual(expect, result)
1417
1418 # Safe parameter in sequence
1419 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1420 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1421 result = urllib.parse.urlencode(given, True, safe=":$")
1422 self.assertEqual(expect, result)
1423
1424 # Test all above in latin-1 encoding
1425
1426 given = ((b'\xa0\x24', b'\xc1\x24'),)
1427 result = urllib.parse.urlencode(given, safe=":$",
1428 encoding="latin-1")
1429 expect = '%A0$=%C1$'
1430 self.assertEqual(expect, result)
1431
1432 given = ((b'\xa0\x24', b'\xc1\x24'),)
1433 expect = '%A0$=%C1$'
1434 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1435 encoding="latin-1")
1436
1437 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1438 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1439 result = urllib.parse.urlencode(given, True, safe=":$",
1440 encoding="latin-1")
1441 self.assertEqual(expect, result)
1442
Brett Cannon74bfd702003-04-25 09:39:47 +00001443class Pathname_Tests(unittest.TestCase):
1444 """Test pathname2url() and url2pathname()"""
1445
1446 def test_basic(self):
1447 # Make sure simple tests pass
1448 expected_path = os.path.join("parts", "of", "a", "path")
1449 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001450 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001451 self.assertEqual(expected_url, result,
1452 "pathname2url() failed; %s != %s" %
1453 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001454 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001455 self.assertEqual(expected_path, result,
1456 "url2pathame() failed; %s != %s" %
1457 (result, expected_path))
1458
1459 def test_quoting(self):
1460 # Test automatic quoting and unquoting works for pathnam2url() and
1461 # url2pathname() respectively
1462 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001463 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1464 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001465 self.assertEqual(expect, result,
1466 "pathname2url() failed; %s != %s" %
1467 (expect, result))
1468 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001469 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001470 self.assertEqual(expect, result,
1471 "url2pathname() failed; %s != %s" %
1472 (expect, result))
1473 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001474 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1475 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001476 self.assertEqual(expect, result,
1477 "pathname2url() failed; %s != %s" %
1478 (expect, result))
1479 given = "make+sure/using_unquote"
1480 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001481 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001482 self.assertEqual(expect, result,
1483 "url2pathname() failed; %s != %s" %
1484 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001485
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001486 @unittest.skipUnless(sys.platform == 'win32',
1487 'test specific to the urllib.url2path function.')
1488 def test_ntpath(self):
1489 given = ('/C:/', '///C:/', '/C|//')
1490 expect = 'C:\\'
1491 for url in given:
1492 result = urllib.request.url2pathname(url)
1493 self.assertEqual(expect, result,
1494 'urllib.request..url2pathname() failed; %s != %s' %
1495 (expect, result))
1496 given = '///C|/path'
1497 expect = 'C:\\path'
1498 result = urllib.request.url2pathname(given)
1499 self.assertEqual(expect, result,
1500 'urllib.request.url2pathname() failed; %s != %s' %
1501 (expect, result))
1502
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001503class Utility_Tests(unittest.TestCase):
1504 """Testcase to test the various utility functions in the urllib."""
1505
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001506 def test_thishost(self):
1507 """Test the urllib.request.thishost utility function returns a tuple"""
1508 self.assertIsInstance(urllib.request.thishost(), tuple)
1509
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001510
Xtreakc661b302019-05-19 19:10:06 +05301511class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001512 """Testcase to test the open method of URLopener class."""
1513
1514 def test_quoted_open(self):
1515 class DummyURLopener(urllib.request.URLopener):
1516 def open_spam(self, url):
1517 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001518 with support.check_warnings(
1519 ('DummyURLopener style of invoking requests is deprecated.',
1520 DeprecationWarning)):
1521 self.assertEqual(DummyURLopener().open(
1522 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001523
Ezio Melotti79b99db2013-02-21 02:41:42 +02001524 # test the safe characters are not quoted by urlopen
1525 self.assertEqual(DummyURLopener().open(
1526 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1527 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001528
Xtreakc661b302019-05-19 19:10:06 +05301529 @support.ignore_warnings(category=DeprecationWarning)
1530 def test_urlopener_retrieve_file(self):
1531 with support.temp_dir() as tmpdir:
1532 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1533 os.close(fd)
1534 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1535 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001536 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1537 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301538
1539 @support.ignore_warnings(category=DeprecationWarning)
1540 def test_urlopener_retrieve_remote(self):
1541 url = "http://www.python.org/file.txt"
1542 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1543 self.addCleanup(self.unfakehttp)
1544 filename, _ = urllib.request.URLopener().retrieve(url)
1545 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1546
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001547 @support.ignore_warnings(category=DeprecationWarning)
1548 def test_local_file_open(self):
1549 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1550 class DummyURLopener(urllib.request.URLopener):
1551 def open_local_file(self, url):
1552 return url
1553 for url in ('local_file://example', 'local-file://example'):
1554 self.assertRaises(OSError, urllib.request.urlopen, url)
1555 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1556 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1557 self.assertRaises(OSError, DummyURLopener().open, url)
1558 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1559
Xtreakc661b302019-05-19 19:10:06 +05301560
Guido van Rossume7ba4952007-06-06 23:52:48 +00001561# Just commented them out.
1562# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001563# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001564# fail in one of the tests, sometimes in other. I have a linux, and
1565# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001566# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001567# . Facundo
1568#
1569# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001570# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001571# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1572# serv.settimeout(3)
1573# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1574# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001575# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001576# try:
1577# conn, addr = serv.accept()
1578# conn.send("1 Hola mundo\n")
1579# cantdata = 0
1580# while cantdata < 13:
1581# data = conn.recv(13-cantdata)
1582# cantdata += len(data)
1583# time.sleep(.3)
1584# conn.send("2 No more lines\n")
1585# conn.close()
1586# except socket.timeout:
1587# pass
1588# finally:
1589# serv.close()
1590# evt.set()
1591#
1592# class FTPWrapperTests(unittest.TestCase):
1593#
1594# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001595# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001596# ftplib.FTP.port = 9093
1597# self.evt = threading.Event()
1598# threading.Thread(target=server, args=(self.evt,)).start()
1599# time.sleep(.1)
1600#
1601# def tearDown(self):
1602# self.evt.wait()
1603#
1604# def testBasic(self):
1605# # connects
1606# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001607# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001608#
1609# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001610# # global default timeout is ignored
1611# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001612# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001613# socket.setdefaulttimeout(30)
1614# try:
1615# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1616# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001617# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001618# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001619# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001620#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001621# def testTimeoutDefault(self):
1622# # global default timeout is used
1623# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001624# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001625# socket.setdefaulttimeout(30)
1626# try:
1627# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1628# finally:
1629# socket.setdefaulttimeout(None)
1630# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1631# ftp.close()
1632#
1633# def testTimeoutValue(self):
1634# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1635# timeout=30)
1636# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1637# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001638
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001639
Senthil Kumarande49d642011-10-16 23:54:44 +08001640class RequestTests(unittest.TestCase):
1641 """Unit tests for urllib.request.Request."""
1642
1643 def test_default_values(self):
1644 Request = urllib.request.Request
1645 request = Request("http://www.python.org")
1646 self.assertEqual(request.get_method(), 'GET')
1647 request = Request("http://www.python.org", {})
1648 self.assertEqual(request.get_method(), 'POST')
1649
1650 def test_with_method_arg(self):
1651 Request = urllib.request.Request
1652 request = Request("http://www.python.org", method='HEAD')
1653 self.assertEqual(request.method, 'HEAD')
1654 self.assertEqual(request.get_method(), 'HEAD')
1655 request = Request("http://www.python.org", {}, method='HEAD')
1656 self.assertEqual(request.method, 'HEAD')
1657 self.assertEqual(request.get_method(), 'HEAD')
1658 request = Request("http://www.python.org", method='GET')
1659 self.assertEqual(request.get_method(), 'GET')
1660 request.method = 'HEAD'
1661 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001662
1663
Senthil Kumaran277e9092013-04-10 20:51:19 -07001664class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001665
Senthil Kumaran277e9092013-04-10 20:51:19 -07001666 def test_converting_drive_letter(self):
1667 self.assertEqual(url2pathname("///C|"), 'C:')
1668 self.assertEqual(url2pathname("///C:"), 'C:')
1669 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001670
Senthil Kumaran277e9092013-04-10 20:51:19 -07001671 def test_converting_when_no_drive_letter(self):
1672 # cannot end a raw string in \
1673 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1674 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1675
1676 def test_simple_compare(self):
1677 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1678 r'C:\foo\bar\spam.foo')
1679
1680 def test_non_ascii_drive_letter(self):
1681 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1682
1683 def test_roundtrip_url2pathname(self):
1684 list_of_paths = ['C:',
1685 r'\\\C\test\\',
1686 r'C:\foo\bar\spam.foo'
1687 ]
1688 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001689 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001690
1691class PathName2URLTests(unittest.TestCase):
1692
1693 def test_converting_drive_letter(self):
1694 self.assertEqual(pathname2url("C:"), '///C:')
1695 self.assertEqual(pathname2url("C:\\"), '///C:')
1696
1697 def test_converting_when_no_drive_letter(self):
1698 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1699 '/////folder/test/')
1700 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1701 '////folder/test/')
1702 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1703 '/folder/test/')
1704
1705 def test_simple_compare(self):
1706 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1707 "///C:/foo/bar/spam.foo" )
1708
1709 def test_long_drive_letter(self):
1710 self.assertRaises(IOError, pathname2url, "XX:\\")
1711
1712 def test_roundtrip_pathname2url(self):
1713 list_of_paths = ['///C:',
1714 '/////folder/test/',
1715 '///C:/foo/bar/spam.foo']
1716 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001717 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001718
1719if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001720 unittest.main()