blob: 9a6b5f66b7a13258e8d7fbc6a151da1d2d7adbbd [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Victor Stinnereb976e42019-06-12 04:07:38 +020059def fakehttp(fakedata, mock_close=False):
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030060 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
Victor Stinnereb976e42019-06-12 04:07:38 +020093
94 if mock_close:
95 # bpo-36918: HTTPConnection destructor calls close() which calls
96 # flush(). Problem: flush() calls self.fp.flush() which raises
97 # "ValueError: I/O operation on closed file" which is logged as an
98 # "Exception ignored in". Override close() to silence this error.
99 def close(self):
100 pass
Martin Panterce6e0682016-05-16 01:07:13 +0000101 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300102
103 return FakeHTTPConnection
104
105
Senthil Kumarance260142011-11-01 01:35:17 +0800106class FakeHTTPMixin(object):
Victor Stinnereb976e42019-06-12 04:07:38 +0200107 def fakehttp(self, fakedata, mock_close=False):
108 fake_http_class = fakehttp(fakedata, mock_close=mock_close)
Senthil Kumarance260142011-11-01 01:35:17 +0800109 self._connection_class = http.client.HTTPConnection
Victor Stinnereb976e42019-06-12 04:07:38 +0200110 http.client.HTTPConnection = fake_http_class
Senthil Kumarance260142011-11-01 01:35:17 +0800111
112 def unfakehttp(self):
113 http.client.HTTPConnection = self._connection_class
114
115
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700116class FakeFTPMixin(object):
117 def fakeftp(self):
118 class FakeFtpWrapper(object):
119 def __init__(self, user, passwd, host, port, dirs, timeout=None,
120 persistent=True):
121 pass
122
123 def retrfile(self, file, type):
124 return io.BytesIO(), 0
125
126 def close(self):
127 pass
128
129 self._ftpwrapper_class = urllib.request.ftpwrapper
130 urllib.request.ftpwrapper = FakeFtpWrapper
131
132 def unfakeftp(self):
133 urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
Brett Cannon74bfd702003-04-25 09:39:47 +0000136class urlopen_FileTests(unittest.TestCase):
137 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000138
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000140 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 # Create a temp file to use for testing
146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147 "ascii")
148 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000150 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000153 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000154 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def tearDown(self):
157 """Shut down the open object"""
158 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000159 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000160
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 def test_interface(self):
162 # Make sure object returned by urlopen() has the specified methods
163 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000164 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000165 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "object returned by urlopen() lacks %s attribute" %
167 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_read(self):
170 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000171
Brett Cannon74bfd702003-04-25 09:39:47 +0000172 def test_readline(self):
173 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000174 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 "calling readline() after exhausting the file did not"
176 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_readlines(self):
179 lines_list = self.returned_obj.readlines()
180 self.assertEqual(len(lines_list), 1,
181 "readlines() returned the wrong number of lines")
182 self.assertEqual(lines_list[0], self.text,
183 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 def test_fileno(self):
186 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000187 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 self.assertEqual(os.read(file_num, len(self.text)), self.text,
189 "Reading on the file descriptor returned by fileno() "
190 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800193 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000194 # by the tearDown() method for the test
195 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Ashwin Ramaswamiff2e1822019-09-13 04:40:08 -0700197 def test_headers(self):
198 self.assertIsInstance(self.returned_obj.headers, email.message.Message)
199
200 def test_url(self):
201 self.assertEqual(self.returned_obj.url, self.pathname)
202
203 def test_status(self):
204 self.assertIsNone(self.returned_obj.status)
205
Brett Cannon74bfd702003-04-25 09:39:47 +0000206 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000207 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000208
Brett Cannon74bfd702003-04-25 09:39:47 +0000209 def test_geturl(self):
210 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000211
Christian Heimes9bd667a2008-01-20 15:14:11 +0000212 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000213 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000214
Brett Cannon74bfd702003-04-25 09:39:47 +0000215 def test_iter(self):
216 # Test iterator
217 # Don't need to count number of iterations since test would fail the
218 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200219 # comparison.
220 # Use the iterator in the usual implicit way to test for ticket #4608.
221 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000222 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000223
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800224 def test_relativelocalfile(self):
225 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
226
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700227
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000228class ProxyTests(unittest.TestCase):
229
230 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000231 # Records changes to env vars
232 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000233 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000234 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000235 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000236 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000237
238 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000239 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000240 self.env.__exit__()
241 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000242
243 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000244 self.env.set('NO_PROXY', 'localhost')
245 proxies = urllib.request.getproxies_environment()
246 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000247 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800248 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700249 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800250 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700251 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
252 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
253
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700254 def test_proxy_cgi_ignore(self):
255 try:
256 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
257 proxies = urllib.request.getproxies_environment()
258 self.assertEqual('http://somewhere:3128', proxies['http'])
259 self.env.set('REQUEST_METHOD', 'GET')
260 proxies = urllib.request.getproxies_environment()
261 self.assertNotIn('http', proxies)
262 finally:
263 self.env.unset('REQUEST_METHOD')
264 self.env.unset('HTTP_PROXY')
265
Martin Panteraa279822016-04-30 01:03:40 +0000266 def test_proxy_bypass_environment_host_match(self):
267 bypass = urllib.request.proxy_bypass_environment
268 self.env.set('NO_PROXY',
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800269 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
Martin Panteraa279822016-04-30 01:03:40 +0000270 self.assertTrue(bypass('localhost'))
271 self.assertTrue(bypass('LocalHost')) # MixedCase
272 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
273 self.assertTrue(bypass('newdomain.com:1234'))
Xiang Zhang959ff7f2017-01-09 11:47:55 +0800274 self.assertTrue(bypass('foo.d.o.t')) # issue 29142
Martin Panteraa279822016-04-30 01:03:40 +0000275 self.assertTrue(bypass('anotherdomain.com:8888'))
276 self.assertTrue(bypass('www.newdomain.com:1234'))
277 self.assertFalse(bypass('prelocalhost'))
278 self.assertFalse(bypass('newdomain.com')) # no port
279 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700280
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700281
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700282class ProxyTests_withOrderedEnv(unittest.TestCase):
283
284 def setUp(self):
285 # We need to test conditions, where variable order _is_ significant
286 self._saved_env = os.environ
287 # Monkey patch os.environ, start with empty fake environment
288 os.environ = collections.OrderedDict()
289
290 def tearDown(self):
291 os.environ = self._saved_env
292
293 def test_getproxies_environment_prefer_lowercase(self):
294 # Test lowercase preference with removal
295 os.environ['no_proxy'] = ''
296 os.environ['No_Proxy'] = 'localhost'
297 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
298 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
299 os.environ['http_proxy'] = ''
300 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
301 proxies = urllib.request.getproxies_environment()
302 self.assertEqual({}, proxies)
303 # Test lowercase preference of proxy bypass and correct matching including ports
304 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
305 os.environ['No_Proxy'] = 'xyz.com'
306 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
307 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
308 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
309 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
310 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
311 # Test lowercase preference with replacement
312 os.environ['http_proxy'] = 'http://somewhere:3128'
313 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
314 proxies = urllib.request.getproxies_environment()
315 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000316
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700317
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700318class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000319 """Test urlopen() opening a fake http connection."""
320
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000321 def check_read(self, ver):
322 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000323 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000324 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000325 self.assertEqual(fp.readline(), b"Hello!")
326 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000327 self.assertEqual(fp.geturl(), 'http://python.org/')
328 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000329 finally:
330 self.unfakehttp()
331
Senthil Kumaran26430412011-04-13 07:01:19 +0800332 def test_url_fragment(self):
333 # Issue #11703: geturl() omits fragments in the original URL.
334 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800335 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800336 try:
337 fp = urllib.request.urlopen(url)
338 self.assertEqual(fp.geturl(), url)
339 finally:
340 self.unfakehttp()
341
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800342 def test_willclose(self):
343 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800344 try:
345 resp = urlopen("http://www.python.org")
346 self.assertTrue(resp.fp.will_close)
347 finally:
348 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800349
Xtreak2fc936e2019-05-01 17:29:49 +0530350 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700351 def test_url_with_control_char_rejected(self):
352 for char_no in list(range(0, 0x21)) + [0x7f]:
353 char = chr(char_no)
354 schemeless_url = f"//localhost:7777/test{char}/"
355 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
356 try:
357 # We explicitly test urllib.request.urlopen() instead of the top
358 # level 'def urlopen()' function defined in this... (quite ugly)
359 # test suite. They use different url opening codepaths. Plain
360 # urlopen uses FancyURLOpener which goes via a codepath that
361 # calls urllib.parse.quote() on the URL which makes all of the
362 # above attempts at injection within the url _path_ safe.
363 escaped_char_repr = repr(char).replace('\\', r'\\')
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400364 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700365 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400366 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700367 urllib.request.urlopen(f"http:{schemeless_url}")
368 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400369 InvalidURL, f"contain control.*{escaped_char_repr}"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700370 urllib.request.urlopen(f"https:{schemeless_url}")
371 # This code path quotes the URL so there is no injection.
372 resp = urlopen(f"http:{schemeless_url}")
373 self.assertNotIn(char, resp.geturl())
374 finally:
375 self.unfakehttp()
376
Xtreak2fc936e2019-05-01 17:29:49 +0530377 @unittest.skipUnless(ssl, "ssl module required")
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700378 def test_url_with_newline_header_injection_rejected(self):
379 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
380 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
381 schemeless_url = "//" + host + ":8080/test/?test=a"
382 try:
383 # We explicitly test urllib.request.urlopen() instead of the top
384 # level 'def urlopen()' function defined in this... (quite ugly)
385 # test suite. They use different url opening codepaths. Plain
386 # urlopen uses FancyURLOpener which goes via a codepath that
387 # calls urllib.parse.quote() on the URL which makes all of the
388 # above attempts at injection within the url _path_ safe.
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400389 InvalidURL = http.client.InvalidURL
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700390 with self.assertRaisesRegex(
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400391 InvalidURL, r"contain control.*\\r.*(found at least . .)"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700392 urllib.request.urlopen(f"http:{schemeless_url}")
Gregory P. Smithb7378d72019-05-01 16:39:21 -0400393 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
Gregory P. Smithc4e671e2019-04-30 19:12:21 -0700394 urllib.request.urlopen(f"https:{schemeless_url}")
395 # This code path quotes the URL so there is no injection.
396 resp = urlopen(f"http:{schemeless_url}")
397 self.assertNotIn(' ', resp.geturl())
398 self.assertNotIn('\r', resp.geturl())
399 self.assertNotIn('\n', resp.geturl())
400 finally:
401 self.unfakehttp()
402
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000403 def test_read_0_9(self):
404 # "0.9" response accepted (but not "simple responses" without
405 # a status line)
406 self.check_read(b"0.9")
407
408 def test_read_1_0(self):
409 self.check_read(b"1.0")
410
411 def test_read_1_1(self):
412 self.check_read(b"1.1")
413
Christian Heimes57dddfb2008-01-02 18:30:52 +0000414 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200415 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000416 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
417Date: Wed, 02 Jan 2008 03:03:54 GMT
418Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
419Connection: close
420Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200421''', mock_close=True)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000422 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200423 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000424 finally:
425 self.unfakehttp()
426
guido@google.coma119df92011-03-29 11:41:02 -0700427 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200428 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700429 self.fakehttp(b'''HTTP/1.1 302 Found
430Date: Wed, 02 Jan 2008 03:03:54 GMT
431Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
432Location: file://guidocomputer.athome.com:/python/license
433Connection: close
434Content-Type: text/html; charset=iso-8859-1
Victor Stinnereb976e42019-06-12 04:07:38 +0200435''', mock_close=True)
guido@google.coma119df92011-03-29 11:41:02 -0700436 try:
Martin Pantera0370222016-02-04 06:01:35 +0000437 msg = "Redirection to url 'file:"
438 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
439 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700440 finally:
441 self.unfakehttp()
442
Martin Pantera0370222016-02-04 06:01:35 +0000443 def test_redirect_limit_independent(self):
444 # Ticket #12923: make sure independent requests each use their
445 # own retry limit.
446 for i in range(FancyURLopener().maxtries):
447 self.fakehttp(b'''HTTP/1.1 302 Found
448Location: file://guidocomputer.athome.com:/python/license
449Connection: close
Victor Stinnereb976e42019-06-12 04:07:38 +0200450''', mock_close=True)
Martin Pantera0370222016-02-04 06:01:35 +0000451 try:
452 self.assertRaises(urllib.error.HTTPError, urlopen,
453 "http://something")
454 finally:
455 self.unfakehttp()
456
Guido van Rossumd8faa362007-04-27 19:54:29 +0000457 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200458 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000459 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000460 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000461 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200462 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000463 finally:
464 self.unfakehttp()
465
Senthil Kumaranf5776862012-10-21 13:30:02 -0700466 def test_missing_localfile(self):
467 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700468 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700469 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700470 self.assertTrue(e.exception.filename)
471 self.assertTrue(e.exception.reason)
472
473 def test_file_notexists(self):
474 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700475 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700476 try:
477 self.assertTrue(os.path.exists(tmp_file))
478 with urlopen(tmp_fileurl) as fobj:
479 self.assertTrue(fobj)
480 finally:
481 os.close(fd)
482 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700483 self.assertFalse(os.path.exists(tmp_file))
484 with self.assertRaises(urllib.error.URLError):
485 urlopen(tmp_fileurl)
486
487 def test_ftp_nohost(self):
488 test_ftp_url = 'ftp:///path'
489 with self.assertRaises(urllib.error.URLError) as e:
490 urlopen(test_ftp_url)
491 self.assertFalse(e.exception.filename)
492 self.assertTrue(e.exception.reason)
493
494 def test_ftp_nonexisting(self):
495 with self.assertRaises(urllib.error.URLError) as e:
496 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
497 self.assertFalse(e.exception.filename)
498 self.assertTrue(e.exception.reason)
499
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700500 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
501 def test_ftp_cache_pruning(self):
502 self.fakeftp()
503 try:
504 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
505 urlopen('ftp://localhost')
506 finally:
507 self.unfakeftp()
508
Senthil Kumarande0eb242010-08-01 17:53:37 +0000509 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000510 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000511 try:
512 fp = urlopen("http://user:pass@python.org/")
513 self.assertEqual(fp.readline(), b"Hello!")
514 self.assertEqual(fp.readline(), b"")
515 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
516 self.assertEqual(fp.getcode(), 200)
517 finally:
518 self.unfakehttp()
519
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800520 def test_userpass_inurl_w_spaces(self):
521 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
522 try:
523 userpass = "a b:c d"
524 url = "http://{}@python.org/".format(userpass)
525 fakehttp_wrapper = http.client.HTTPConnection
526 authorization = ("Authorization: Basic %s\r\n" %
527 b64encode(userpass.encode("ASCII")).decode("ASCII"))
528 fp = urlopen(url)
529 # The authorization header must be in place
530 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
531 self.assertEqual(fp.readline(), b"Hello!")
532 self.assertEqual(fp.readline(), b"")
533 # the spaces are quoted in URL so no match
534 self.assertNotEqual(fp.geturl(), url)
535 self.assertEqual(fp.getcode(), 200)
536 finally:
537 self.unfakehttp()
538
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700539 def test_URLopener_deprecation(self):
540 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700541 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700542
Antoine Pitrou07df6552014-11-02 17:23:14 +0100543 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800544 def test_cafile_and_context(self):
545 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200546 with support.check_warnings(('', DeprecationWarning)):
547 with self.assertRaises(ValueError):
548 urllib.request.urlopen(
549 "https://localhost", cafile="/nonexistent/path", context=context
550 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800551
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700552
Antoine Pitroudf204be2012-11-24 17:59:08 +0100553class urlopen_DataTests(unittest.TestCase):
554 """Test urlopen() opening a data URL."""
555
556 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200557 # clear _opener global variable
558 self.addCleanup(urllib.request.urlcleanup)
559
Antoine Pitroudf204be2012-11-24 17:59:08 +0100560 # text containing URL special- and unicode-characters
561 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
562 # 2x1 pixel RGB PNG image with one black and one white pixel
563 self.image = (
564 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
565 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
566 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
567 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
568
569 self.text_url = (
570 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
571 "D%26%20%C3%B6%20%C3%84%20")
572 self.text_url_base64 = (
573 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
574 "sJT0mIPYgxCA%3D")
575 # base64 encoded data URL that contains ignorable spaces,
576 # such as "\n", " ", "%0A", and "%20".
577 self.image_url = (
578 "\n"
579 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
580 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
581
582 self.text_url_resp = urllib.request.urlopen(self.text_url)
583 self.text_url_base64_resp = urllib.request.urlopen(
584 self.text_url_base64)
585 self.image_url_resp = urllib.request.urlopen(self.image_url)
586
587 def test_interface(self):
588 # Make sure object returned by urlopen() has the specified methods
589 for attr in ("read", "readline", "readlines",
590 "close", "info", "geturl", "getcode", "__iter__"):
591 self.assertTrue(hasattr(self.text_url_resp, attr),
592 "object returned by urlopen() lacks %s attribute" %
593 attr)
594
595 def test_info(self):
596 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
597 self.assertEqual(self.text_url_base64_resp.info().get_params(),
598 [('text/plain', ''), ('charset', 'ISO-8859-1')])
599 self.assertEqual(self.image_url_resp.info()['content-length'],
600 str(len(self.image)))
601 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
602 [('text/plain', ''), ('charset', 'US-ASCII')])
603
604 def test_geturl(self):
605 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
606 self.assertEqual(self.text_url_base64_resp.geturl(),
607 self.text_url_base64)
608 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
609
610 def test_read_text(self):
611 self.assertEqual(self.text_url_resp.read().decode(
612 dict(self.text_url_resp.info().get_params())['charset']), self.text)
613
614 def test_read_text_base64(self):
615 self.assertEqual(self.text_url_base64_resp.read().decode(
616 dict(self.text_url_base64_resp.info().get_params())['charset']),
617 self.text)
618
619 def test_read_image(self):
620 self.assertEqual(self.image_url_resp.read(), self.image)
621
622 def test_missing_comma(self):
623 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
624
625 def test_invalid_base64_data(self):
626 # missing padding character
627 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
628
Senthil Kumaranefbd4ea2017-04-01 23:47:35 -0700629
Brett Cannon19691362003-04-29 05:08:06 +0000630class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000631 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000632
Brett Cannon19691362003-04-29 05:08:06 +0000633 def setUp(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200634 # clear _opener global variable
635 self.addCleanup(urllib.request.urlcleanup)
636
Georg Brandl5a650a22005-08-26 08:51:34 +0000637 # Create a list of temporary files. Each item in the list is a file
638 # name (absolute path or relative to the current working directory).
639 # All files in this list will be deleted in the tearDown method. Note,
640 # this only helps to makes sure temporary files get deleted, but it
641 # does nothing about trying to close files that may still be open. It
642 # is the responsibility of the developer to properly close files even
643 # when exceptional conditions occur.
644 self.tempFiles = []
645
Brett Cannon19691362003-04-29 05:08:06 +0000646 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000647 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000648 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000649 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000650 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000651 FILE.write(self.text)
652 FILE.close()
653 finally:
654 try: FILE.close()
655 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000656
657 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000658 # Delete the temporary files.
659 for each in self.tempFiles:
660 try: os.remove(each)
661 except: pass
662
663 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000664 filePath = os.path.abspath(filePath)
665 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000666 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000667 except UnicodeEncodeError:
668 raise unittest.SkipTest("filePath is not encodable to utf8")
669 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000670
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000671 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000672 """Creates a new temporary file containing the specified data,
673 registers the file for deletion during the test fixture tear down, and
674 returns the absolute path of the file."""
675
676 newFd, newFilePath = tempfile.mkstemp()
677 try:
678 self.registerFileForCleanUp(newFilePath)
679 newFile = os.fdopen(newFd, "wb")
680 newFile.write(data)
681 newFile.close()
682 finally:
683 try: newFile.close()
684 except: pass
685 return newFilePath
686
687 def registerFileForCleanUp(self, fileName):
688 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000689
690 def test_basic(self):
691 # Make sure that a local file just gets its own location returned and
692 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000693 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000694 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000695 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000696 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000697 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000698
699 def test_copy(self):
700 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000701 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000702 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000703 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000704 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000705 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000706 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000707 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000708 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000709 try:
710 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000711 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000712 finally:
713 try: FILE.close()
714 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000715 self.assertEqual(self.text, text)
716
717 def test_reporthook(self):
718 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700719 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
720 self.assertIsInstance(block_count, int)
721 self.assertIsInstance(block_read_size, int)
722 self.assertIsInstance(file_size, int)
723 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000724 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000725 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000726 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000727 urllib.request.urlretrieve(
728 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000729 second_temp, hooktester)
730
731 def test_reporthook_0_bytes(self):
732 # Test on zero length file. Should call reporthook only 1 time.
733 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700734 def hooktester(block_count, block_read_size, file_size, _report=report):
735 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000736 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000737 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000738 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000739 self.assertEqual(len(report), 1)
740 self.assertEqual(report[0][2], 0)
741
742 def test_reporthook_5_bytes(self):
743 # Test on 5 byte file. Should call reporthook only 2 times (once when
744 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700745 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000746 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700747 def hooktester(block_count, block_read_size, file_size, _report=report):
748 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000749 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000751 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000752 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800753 self.assertEqual(report[0][2], 5)
754 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000755
756 def test_reporthook_8193_bytes(self):
757 # Test on 8193 byte file. Should call reporthook only 3 times (once
758 # when the "network connection" is established, once for the next 8192
759 # bytes, and once for the last byte).
760 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700761 def hooktester(block_count, block_read_size, file_size, _report=report):
762 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000763 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000765 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000766 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800767 self.assertEqual(report[0][2], 8193)
768 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700769 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800770 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000771
Senthil Kumarance260142011-11-01 01:35:17 +0800772
773class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
774 """Test urllib.urlretrieve() using fake http connections"""
775
776 def test_short_content_raises_ContentTooShortError(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200777 self.addCleanup(urllib.request.urlcleanup)
778
Senthil Kumarance260142011-11-01 01:35:17 +0800779 self.fakehttp(b'''HTTP/1.1 200 OK
780Date: Wed, 02 Jan 2008 03:03:54 GMT
781Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
782Connection: close
783Content-Length: 100
784Content-Type: text/html; charset=iso-8859-1
785
786FF
787''')
788
789 def _reporthook(par1, par2, par3):
790 pass
791
792 with self.assertRaises(urllib.error.ContentTooShortError):
793 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100794 urllib.request.urlretrieve(support.TEST_HTTP_URL,
Senthil Kumarance260142011-11-01 01:35:17 +0800795 reporthook=_reporthook)
796 finally:
797 self.unfakehttp()
798
799 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200800 self.addCleanup(urllib.request.urlcleanup)
801
Senthil Kumarance260142011-11-01 01:35:17 +0800802 self.fakehttp(b'''HTTP/1.1 200 OK
803Date: Wed, 02 Jan 2008 03:03:54 GMT
804Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
805Connection: close
806Content-Length: 100
807Content-Type: text/html; charset=iso-8859-1
808
809FF
810''')
811 with self.assertRaises(urllib.error.ContentTooShortError):
812 try:
Stéphane Wirtela40681d2019-02-22 14:45:36 +0100813 urllib.request.urlretrieve(support.TEST_HTTP_URL)
Senthil Kumarance260142011-11-01 01:35:17 +0800814 finally:
815 self.unfakehttp()
816
817
Brett Cannon74bfd702003-04-25 09:39:47 +0000818class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400819 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000820
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530821 According to RFC 3986 (Uniform Resource Identifiers), to escape a
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 character you write it as '%' + <2 character US-ASCII hex value>.
823 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
824 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000825
826 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000827
Brett Cannon74bfd702003-04-25 09:39:47 +0000828 Reserved characters : ";/?:@&=+$,"
829 Have special meaning in URIs and must be escaped if not being used for
830 their special meaning
831 Data characters : letters, digits, and "-_.!~*'()"
832 Unreserved and do not need to be escaped; can be, though, if desired
833 Control characters : 0x00 - 0x1F, 0x7F
834 Have no use in URIs so must be escaped
835 space : 0x20
836 Must be escaped
837 Delimiters : '<>#%"'
838 Must be escaped
839 Unwise : "{}|\^[]`"
840 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000841
Brett Cannon74bfd702003-04-25 09:39:47 +0000842 """
843
844 def test_never_quote(self):
845 # Make sure quote() does not quote letters, digits, and "_,.-"
846 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
847 "abcdefghijklmnopqrstuvwxyz",
848 "0123456789",
Ratnadeep Debnath21024f02017-02-25 14:30:28 +0530849 "_.-~"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000850 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000851 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000852 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000853 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000854 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000855 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000856
857 def test_default_safe(self):
858 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000859 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000860
861 def test_safe(self):
862 # Test setting 'safe' parameter does what it should do
863 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000864 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000865 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000866 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000867 result = urllib.parse.quote_plus(quote_by_default,
868 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000869 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000870 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000871 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000872 # Safe expressed as bytes rather than str
873 result = urllib.parse.quote(quote_by_default, safe=b"<>")
874 self.assertEqual(quote_by_default, result,
875 "using quote(): %r != %r" % (quote_by_default, result))
876 # "Safe" non-ASCII characters should have no effect
877 # (Since URIs are not allowed to have non-ASCII characters)
878 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
879 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
880 self.assertEqual(expect, result,
881 "using quote(): %r != %r" %
882 (expect, result))
883 # Same as above, but using a bytes rather than str
884 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
885 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
886 self.assertEqual(expect, result,
887 "using quote(): %r != %r" %
888 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000889
890 def test_default_quoting(self):
891 # Make sure all characters that should be quoted are by default sans
892 # space (separate test for that).
893 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400894 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000895 should_quote.append(chr(127)) # For 0x7F
896 should_quote = ''.join(should_quote)
897 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000898 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000899 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000900 "using quote(): "
901 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000902 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000903 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000904 self.assertEqual(hexescape(char), result,
905 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000906 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000907 (char, hexescape(char), result))
908 del should_quote
909 partial_quote = "ab[]cd"
910 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000911 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000912 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000913 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800914 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000915 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000916 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000917
918 def test_quoting_space(self):
919 # Make sure quote() and quote_plus() handle spaces as specified in
920 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000921 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000922 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000923 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000924 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000925 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000926 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000927 given = "a b cd e f"
928 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000929 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000930 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000931 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000932 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000933 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000934 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000935 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000936
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000937 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000938 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000939 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000940 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000941 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000942 # Test with bytes
943 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
944 'alpha%2Bbeta+gamma')
945 # Test with safe bytes
946 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
947 'alpha+beta+gamma')
948
949 def test_quote_bytes(self):
950 # Bytes should quote directly to percent-encoded values
951 given = b"\xa2\xd8ab\xff"
952 expect = "%A2%D8ab%FF"
953 result = urllib.parse.quote(given)
954 self.assertEqual(expect, result,
955 "using quote(): %r != %r" % (expect, result))
956 # Encoding argument should raise type error on bytes input
957 self.assertRaises(TypeError, urllib.parse.quote, given,
958 encoding="latin-1")
959 # quote_from_bytes should work the same
960 result = urllib.parse.quote_from_bytes(given)
961 self.assertEqual(expect, result,
962 "using quote_from_bytes(): %r != %r"
963 % (expect, result))
964
965 def test_quote_with_unicode(self):
966 # Characters in Latin-1 range, encoded by default in UTF-8
967 given = "\xa2\xd8ab\xff"
968 expect = "%C2%A2%C3%98ab%C3%BF"
969 result = urllib.parse.quote(given)
970 self.assertEqual(expect, result,
971 "using quote(): %r != %r" % (expect, result))
972 # Characters in Latin-1 range, encoded by with None (default)
973 result = urllib.parse.quote(given, encoding=None, errors=None)
974 self.assertEqual(expect, result,
975 "using quote(): %r != %r" % (expect, result))
976 # Characters in Latin-1 range, encoded with Latin-1
977 given = "\xa2\xd8ab\xff"
978 expect = "%A2%D8ab%FF"
979 result = urllib.parse.quote(given, encoding="latin-1")
980 self.assertEqual(expect, result,
981 "using quote(): %r != %r" % (expect, result))
982 # Characters in BMP, encoded by default in UTF-8
983 given = "\u6f22\u5b57" # "Kanji"
984 expect = "%E6%BC%A2%E5%AD%97"
985 result = urllib.parse.quote(given)
986 self.assertEqual(expect, result,
987 "using quote(): %r != %r" % (expect, result))
988 # Characters in BMP, encoded with Latin-1
989 given = "\u6f22\u5b57"
990 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
991 encoding="latin-1")
992 # Characters in BMP, encoded with Latin-1, with replace error handling
993 given = "\u6f22\u5b57"
994 expect = "%3F%3F" # "??"
995 result = urllib.parse.quote(given, encoding="latin-1",
996 errors="replace")
997 self.assertEqual(expect, result,
998 "using quote(): %r != %r" % (expect, result))
999 # Characters in BMP, Latin-1, with xmlcharref error handling
1000 given = "\u6f22\u5b57"
1001 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
1002 result = urllib.parse.quote(given, encoding="latin-1",
1003 errors="xmlcharrefreplace")
1004 self.assertEqual(expect, result,
1005 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +00001006
Georg Brandlfaf41492009-05-26 18:31:11 +00001007 def test_quote_plus_with_unicode(self):
1008 # Encoding (latin-1) test for quote_plus
1009 given = "\xa2\xd8 \xff"
1010 expect = "%A2%D8+%FF"
1011 result = urllib.parse.quote_plus(given, encoding="latin-1")
1012 self.assertEqual(expect, result,
1013 "using quote_plus(): %r != %r" % (expect, result))
1014 # Errors test for quote_plus
1015 given = "ab\u6f22\u5b57 cd"
1016 expect = "ab%3F%3F+cd"
1017 result = urllib.parse.quote_plus(given, encoding="latin-1",
1018 errors="replace")
1019 self.assertEqual(expect, result,
1020 "using quote_plus(): %r != %r" % (expect, result))
1021
Senthil Kumarand496c4c2010-07-30 19:34:36 +00001022
Brett Cannon74bfd702003-04-25 09:39:47 +00001023class UnquotingTests(unittest.TestCase):
1024 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +00001025
Brett Cannon74bfd702003-04-25 09:39:47 +00001026 See the doc string for quoting_Tests for details on quoting and such.
1027
1028 """
1029
1030 def test_unquoting(self):
1031 # Make sure unquoting of all ASCII values works
1032 escape_list = []
1033 for num in range(128):
1034 given = hexescape(chr(num))
1035 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001036 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001037 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001038 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001039 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001040 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001041 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +00001042 (expect, result))
1043 escape_list.append(given)
1044 escape_string = ''.join(escape_list)
1045 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001046 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +00001047 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +00001048 "using unquote(): not all characters escaped: "
1049 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +00001050 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1051 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +00001052 with support.check_warnings(('', BytesWarning), quiet=True):
1053 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +00001054
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001055 def test_unquoting_badpercent(self):
1056 # Test unquoting on bad percent-escapes
1057 given = '%xab'
1058 expect = given
1059 result = urllib.parse.unquote(given)
1060 self.assertEqual(expect, result, "using unquote(): %r != %r"
1061 % (expect, result))
1062 given = '%x'
1063 expect = given
1064 result = urllib.parse.unquote(given)
1065 self.assertEqual(expect, result, "using unquote(): %r != %r"
1066 % (expect, result))
1067 given = '%'
1068 expect = given
1069 result = urllib.parse.unquote(given)
1070 self.assertEqual(expect, result, "using unquote(): %r != %r"
1071 % (expect, result))
1072 # unquote_to_bytes
1073 given = '%xab'
1074 expect = bytes(given, 'ascii')
1075 result = urllib.parse.unquote_to_bytes(given)
1076 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1077 % (expect, result))
1078 given = '%x'
1079 expect = bytes(given, 'ascii')
1080 result = urllib.parse.unquote_to_bytes(given)
1081 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1082 % (expect, result))
1083 given = '%'
1084 expect = bytes(given, 'ascii')
1085 result = urllib.parse.unquote_to_bytes(given)
1086 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1087 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001088 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1089 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001090
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001091 def test_unquoting_mixed_case(self):
1092 # Test unquoting on mixed-case hex digits in the percent-escapes
1093 given = '%Ab%eA'
1094 expect = b'\xab\xea'
1095 result = urllib.parse.unquote_to_bytes(given)
1096 self.assertEqual(expect, result,
1097 "using unquote_to_bytes(): %r != %r"
1098 % (expect, result))
1099
Brett Cannon74bfd702003-04-25 09:39:47 +00001100 def test_unquoting_parts(self):
1101 # Make sure unquoting works when have non-quoted characters
1102 # interspersed
1103 given = 'ab%sd' % hexescape('c')
1104 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001105 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001106 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001107 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001108 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001109 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001110 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001111
Brett Cannon74bfd702003-04-25 09:39:47 +00001112 def test_unquoting_plus(self):
1113 # Test difference between unquote() and unquote_plus()
1114 given = "are+there+spaces..."
1115 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001116 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001117 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001118 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001119 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001120 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001121 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001122 "using unquote_plus(): %r != %r" % (expect, result))
1123
1124 def test_unquote_to_bytes(self):
1125 given = 'br%C3%BCckner_sapporo_20050930.doc'
1126 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1127 result = urllib.parse.unquote_to_bytes(given)
1128 self.assertEqual(expect, result,
1129 "using unquote_to_bytes(): %r != %r"
1130 % (expect, result))
1131 # Test on a string with unescaped non-ASCII characters
1132 # (Technically an invalid URI; expect those characters to be UTF-8
1133 # encoded).
1134 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1135 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1136 self.assertEqual(expect, result,
1137 "using unquote_to_bytes(): %r != %r"
1138 % (expect, result))
1139 # Test with a bytes as input
1140 given = b'%A2%D8ab%FF'
1141 expect = b'\xa2\xd8ab\xff'
1142 result = urllib.parse.unquote_to_bytes(given)
1143 self.assertEqual(expect, result,
1144 "using unquote_to_bytes(): %r != %r"
1145 % (expect, result))
1146 # Test with a bytes as input, with unescaped non-ASCII bytes
1147 # (Technically an invalid URI; expect those bytes to be preserved)
1148 given = b'%A2\xd8ab%FF'
1149 expect = b'\xa2\xd8ab\xff'
1150 result = urllib.parse.unquote_to_bytes(given)
1151 self.assertEqual(expect, result,
1152 "using unquote_to_bytes(): %r != %r"
1153 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001154
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001155 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001156 # Characters in the Latin-1 range, encoded with UTF-8
1157 given = 'br%C3%BCckner_sapporo_20050930.doc'
1158 expect = 'br\u00fcckner_sapporo_20050930.doc'
1159 result = urllib.parse.unquote(given)
1160 self.assertEqual(expect, result,
1161 "using unquote(): %r != %r" % (expect, result))
1162 # Characters in the Latin-1 range, encoded with None (default)
1163 result = urllib.parse.unquote(given, encoding=None, errors=None)
1164 self.assertEqual(expect, result,
1165 "using unquote(): %r != %r" % (expect, result))
1166
1167 # Characters in the Latin-1 range, encoded with Latin-1
1168 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1169 encoding="latin-1")
1170 expect = 'br\u00fcckner_sapporo_20050930.doc'
1171 self.assertEqual(expect, result,
1172 "using unquote(): %r != %r" % (expect, result))
1173
1174 # Characters in BMP, encoded with UTF-8
1175 given = "%E6%BC%A2%E5%AD%97"
1176 expect = "\u6f22\u5b57" # "Kanji"
1177 result = urllib.parse.unquote(given)
1178 self.assertEqual(expect, result,
1179 "using unquote(): %r != %r" % (expect, result))
1180
1181 # Decode with UTF-8, invalid sequence
1182 given = "%F3%B1"
1183 expect = "\ufffd" # Replacement character
1184 result = urllib.parse.unquote(given)
1185 self.assertEqual(expect, result,
1186 "using unquote(): %r != %r" % (expect, result))
1187
1188 # Decode with UTF-8, invalid sequence, replace errors
1189 result = urllib.parse.unquote(given, errors="replace")
1190 self.assertEqual(expect, result,
1191 "using unquote(): %r != %r" % (expect, result))
1192
1193 # Decode with UTF-8, invalid sequence, ignoring errors
1194 given = "%F3%B1"
1195 expect = ""
1196 result = urllib.parse.unquote(given, errors="ignore")
1197 self.assertEqual(expect, result,
1198 "using unquote(): %r != %r" % (expect, result))
1199
1200 # A mix of non-ASCII and percent-encoded characters, UTF-8
1201 result = urllib.parse.unquote("\u6f22%C3%BC")
1202 expect = '\u6f22\u00fc'
1203 self.assertEqual(expect, result,
1204 "using unquote(): %r != %r" % (expect, result))
1205
1206 # A mix of non-ASCII and percent-encoded characters, Latin-1
1207 # (Note, the string contains non-Latin-1-representable characters)
1208 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1209 expect = '\u6f22\u00fc'
1210 self.assertEqual(expect, result,
1211 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001212
Brett Cannon74bfd702003-04-25 09:39:47 +00001213class urlencode_Tests(unittest.TestCase):
1214 """Tests for urlencode()"""
1215
1216 def help_inputtype(self, given, test_type):
1217 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001218
Brett Cannon74bfd702003-04-25 09:39:47 +00001219 'given' must lead to only the pairs:
1220 * 1st, 1
1221 * 2nd, 2
1222 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001223
Brett Cannon74bfd702003-04-25 09:39:47 +00001224 Test cannot assume anything about order. Docs make no guarantee and
1225 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001226
Brett Cannon74bfd702003-04-25 09:39:47 +00001227 """
1228 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001229 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001230 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001231 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001232 "testing %s: %s not found in %s" %
1233 (test_type, expected, result))
1234 self.assertEqual(result.count('&'), 2,
1235 "testing %s: expected 2 '&'s; got %s" %
1236 (test_type, result.count('&')))
1237 amp_location = result.index('&')
1238 on_amp_left = result[amp_location - 1]
1239 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001240 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001241 "testing %s: '&' not located in proper place in %s" %
1242 (test_type, result))
1243 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1244 "testing %s: "
1245 "unexpected number of characters: %s != %s" %
1246 (test_type, len(result), (5 * 3) + 2))
1247
1248 def test_using_mapping(self):
1249 # Test passing in a mapping object as an argument.
1250 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1251 "using dict as input type")
1252
1253 def test_using_sequence(self):
1254 # Test passing in a sequence of two-item sequences as an argument.
1255 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1256 "using sequence of two-item tuples as input")
1257
1258 def test_quoting(self):
1259 # Make sure keys and values are quoted using quote_plus()
1260 given = {"&":"="}
1261 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001262 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001263 self.assertEqual(expect, result)
1264 given = {"key name":"A bunch of pluses"}
1265 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001266 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001267 self.assertEqual(expect, result)
1268
1269 def test_doseq(self):
1270 # Test that passing True for 'doseq' parameter works correctly
1271 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001272 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1273 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001274 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001275 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001276 for value in given["sequence"]:
1277 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001278 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001279 self.assertEqual(result.count('&'), 2,
1280 "Expected 2 '&'s, got %s" % result.count('&'))
1281
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001282 def test_empty_sequence(self):
1283 self.assertEqual("", urllib.parse.urlencode({}))
1284 self.assertEqual("", urllib.parse.urlencode([]))
1285
1286 def test_nonstring_values(self):
1287 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1288 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1289
1290 def test_nonstring_seq_values(self):
1291 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1292 self.assertEqual("a=None&a=a",
1293 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001294 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001295 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001296 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001297
Senthil Kumarandf022da2010-07-03 17:48:22 +00001298 def test_urlencode_encoding(self):
1299 # ASCII encoding. Expect %3F with errors="replace'
1300 given = (('\u00a0', '\u00c1'),)
1301 expect = '%3F=%3F'
1302 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1303 self.assertEqual(expect, result)
1304
1305 # Default is UTF-8 encoding.
1306 given = (('\u00a0', '\u00c1'),)
1307 expect = '%C2%A0=%C3%81'
1308 result = urllib.parse.urlencode(given)
1309 self.assertEqual(expect, result)
1310
1311 # Latin-1 encoding.
1312 given = (('\u00a0', '\u00c1'),)
1313 expect = '%A0=%C1'
1314 result = urllib.parse.urlencode(given, encoding="latin-1")
1315 self.assertEqual(expect, result)
1316
1317 def test_urlencode_encoding_doseq(self):
1318 # ASCII Encoding. Expect %3F with errors="replace'
1319 given = (('\u00a0', '\u00c1'),)
1320 expect = '%3F=%3F'
1321 result = urllib.parse.urlencode(given, doseq=True,
1322 encoding="ASCII", errors="replace")
1323 self.assertEqual(expect, result)
1324
1325 # ASCII Encoding. On a sequence of values.
1326 given = (("\u00a0", (1, "\u00c1")),)
1327 expect = '%3F=1&%3F=%3F'
1328 result = urllib.parse.urlencode(given, True,
1329 encoding="ASCII", errors="replace")
1330 self.assertEqual(expect, result)
1331
1332 # Utf-8
1333 given = (("\u00a0", "\u00c1"),)
1334 expect = '%C2%A0=%C3%81'
1335 result = urllib.parse.urlencode(given, True)
1336 self.assertEqual(expect, result)
1337
1338 given = (("\u00a0", (42, "\u00c1")),)
1339 expect = '%C2%A0=42&%C2%A0=%C3%81'
1340 result = urllib.parse.urlencode(given, True)
1341 self.assertEqual(expect, result)
1342
1343 # latin-1
1344 given = (("\u00a0", "\u00c1"),)
1345 expect = '%A0=%C1'
1346 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1347 self.assertEqual(expect, result)
1348
1349 given = (("\u00a0", (42, "\u00c1")),)
1350 expect = '%A0=42&%A0=%C1'
1351 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1352 self.assertEqual(expect, result)
1353
1354 def test_urlencode_bytes(self):
1355 given = ((b'\xa0\x24', b'\xc1\x24'),)
1356 expect = '%A0%24=%C1%24'
1357 result = urllib.parse.urlencode(given)
1358 self.assertEqual(expect, result)
1359 result = urllib.parse.urlencode(given, True)
1360 self.assertEqual(expect, result)
1361
1362 # Sequence of values
1363 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1364 expect = '%A0%24=42&%A0%24=%C1%24'
1365 result = urllib.parse.urlencode(given, True)
1366 self.assertEqual(expect, result)
1367
1368 def test_urlencode_encoding_safe_parameter(self):
1369
1370 # Send '$' (\x24) as safe character
1371 # Default utf-8 encoding
1372
1373 given = ((b'\xa0\x24', b'\xc1\x24'),)
1374 result = urllib.parse.urlencode(given, safe=":$")
1375 expect = '%A0$=%C1$'
1376 self.assertEqual(expect, result)
1377
1378 given = ((b'\xa0\x24', b'\xc1\x24'),)
1379 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1380 expect = '%A0$=%C1$'
1381 self.assertEqual(expect, result)
1382
1383 # Safe parameter in sequence
1384 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1385 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1386 result = urllib.parse.urlencode(given, True, safe=":$")
1387 self.assertEqual(expect, result)
1388
1389 # Test all above in latin-1 encoding
1390
1391 given = ((b'\xa0\x24', b'\xc1\x24'),)
1392 result = urllib.parse.urlencode(given, safe=":$",
1393 encoding="latin-1")
1394 expect = '%A0$=%C1$'
1395 self.assertEqual(expect, result)
1396
1397 given = ((b'\xa0\x24', b'\xc1\x24'),)
1398 expect = '%A0$=%C1$'
1399 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1400 encoding="latin-1")
1401
1402 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1403 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1404 result = urllib.parse.urlencode(given, True, safe=":$",
1405 encoding="latin-1")
1406 self.assertEqual(expect, result)
1407
Brett Cannon74bfd702003-04-25 09:39:47 +00001408class Pathname_Tests(unittest.TestCase):
1409 """Test pathname2url() and url2pathname()"""
1410
1411 def test_basic(self):
1412 # Make sure simple tests pass
1413 expected_path = os.path.join("parts", "of", "a", "path")
1414 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001415 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001416 self.assertEqual(expected_url, result,
1417 "pathname2url() failed; %s != %s" %
1418 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001419 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001420 self.assertEqual(expected_path, result,
1421 "url2pathame() failed; %s != %s" %
1422 (result, expected_path))
1423
1424 def test_quoting(self):
1425 # Test automatic quoting and unquoting works for pathnam2url() and
1426 # url2pathname() respectively
1427 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001428 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1429 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001430 self.assertEqual(expect, result,
1431 "pathname2url() failed; %s != %s" %
1432 (expect, result))
1433 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001434 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001435 self.assertEqual(expect, result,
1436 "url2pathname() failed; %s != %s" %
1437 (expect, result))
1438 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001439 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1440 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001441 self.assertEqual(expect, result,
1442 "pathname2url() failed; %s != %s" %
1443 (expect, result))
1444 given = "make+sure/using_unquote"
1445 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001446 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001447 self.assertEqual(expect, result,
1448 "url2pathname() failed; %s != %s" %
1449 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001450
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001451 @unittest.skipUnless(sys.platform == 'win32',
1452 'test specific to the urllib.url2path function.')
1453 def test_ntpath(self):
1454 given = ('/C:/', '///C:/', '/C|//')
1455 expect = 'C:\\'
1456 for url in given:
1457 result = urllib.request.url2pathname(url)
1458 self.assertEqual(expect, result,
1459 'urllib.request..url2pathname() failed; %s != %s' %
1460 (expect, result))
1461 given = '///C|/path'
1462 expect = 'C:\\path'
1463 result = urllib.request.url2pathname(given)
1464 self.assertEqual(expect, result,
1465 'urllib.request.url2pathname() failed; %s != %s' %
1466 (expect, result))
1467
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001468class Utility_Tests(unittest.TestCase):
1469 """Testcase to test the various utility functions in the urllib."""
1470
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001471 def test_thishost(self):
1472 """Test the urllib.request.thishost utility function returns a tuple"""
1473 self.assertIsInstance(urllib.request.thishost(), tuple)
1474
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001475
Xtreakc661b302019-05-19 19:10:06 +05301476class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001477 """Testcase to test the open method of URLopener class."""
1478
1479 def test_quoted_open(self):
1480 class DummyURLopener(urllib.request.URLopener):
1481 def open_spam(self, url):
1482 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001483 with support.check_warnings(
1484 ('DummyURLopener style of invoking requests is deprecated.',
1485 DeprecationWarning)):
1486 self.assertEqual(DummyURLopener().open(
1487 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001488
Ezio Melotti79b99db2013-02-21 02:41:42 +02001489 # test the safe characters are not quoted by urlopen
1490 self.assertEqual(DummyURLopener().open(
1491 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1492 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001493
Xtreakc661b302019-05-19 19:10:06 +05301494 @support.ignore_warnings(category=DeprecationWarning)
1495 def test_urlopener_retrieve_file(self):
1496 with support.temp_dir() as tmpdir:
1497 fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1498 os.close(fd)
1499 fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1500 filename, _ = urllib.request.URLopener().retrieve(fileurl)
Berker Peksag2725cb02019-05-22 02:00:35 +03001501 # Some buildbots have TEMP folder that uses a lowercase drive letter.
1502 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
Xtreakc661b302019-05-19 19:10:06 +05301503
1504 @support.ignore_warnings(category=DeprecationWarning)
1505 def test_urlopener_retrieve_remote(self):
1506 url = "http://www.python.org/file.txt"
1507 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1508 self.addCleanup(self.unfakehttp)
1509 filename, _ = urllib.request.URLopener().retrieve(url)
1510 self.assertEqual(os.path.splitext(filename)[1], ".txt")
1511
Victor Stinner0c2b6a32019-05-22 22:15:01 +02001512 @support.ignore_warnings(category=DeprecationWarning)
1513 def test_local_file_open(self):
1514 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1515 class DummyURLopener(urllib.request.URLopener):
1516 def open_local_file(self, url):
1517 return url
1518 for url in ('local_file://example', 'local-file://example'):
1519 self.assertRaises(OSError, urllib.request.urlopen, url)
1520 self.assertRaises(OSError, urllib.request.URLopener().open, url)
1521 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1522 self.assertRaises(OSError, DummyURLopener().open, url)
1523 self.assertRaises(OSError, DummyURLopener().retrieve, url)
1524
Xtreakc661b302019-05-19 19:10:06 +05301525
Guido van Rossume7ba4952007-06-06 23:52:48 +00001526# Just commented them out.
1527# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001528# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001529# fail in one of the tests, sometimes in other. I have a linux, and
1530# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001531# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001532# . Facundo
1533#
1534# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001535# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001536# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1537# serv.settimeout(3)
1538# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1539# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001540# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001541# try:
1542# conn, addr = serv.accept()
1543# conn.send("1 Hola mundo\n")
1544# cantdata = 0
1545# while cantdata < 13:
1546# data = conn.recv(13-cantdata)
1547# cantdata += len(data)
1548# time.sleep(.3)
1549# conn.send("2 No more lines\n")
1550# conn.close()
1551# except socket.timeout:
1552# pass
1553# finally:
1554# serv.close()
1555# evt.set()
1556#
1557# class FTPWrapperTests(unittest.TestCase):
1558#
1559# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001560# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001561# ftplib.FTP.port = 9093
1562# self.evt = threading.Event()
1563# threading.Thread(target=server, args=(self.evt,)).start()
1564# time.sleep(.1)
1565#
1566# def tearDown(self):
1567# self.evt.wait()
1568#
1569# def testBasic(self):
1570# # connects
1571# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001572# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001573#
1574# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001575# # global default timeout is ignored
1576# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001577# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001578# socket.setdefaulttimeout(30)
1579# try:
1580# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1581# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001582# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001583# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001584# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001585#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001586# def testTimeoutDefault(self):
1587# # global default timeout is used
1588# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001589# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001590# socket.setdefaulttimeout(30)
1591# try:
1592# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1593# finally:
1594# socket.setdefaulttimeout(None)
1595# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1596# ftp.close()
1597#
1598# def testTimeoutValue(self):
1599# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1600# timeout=30)
1601# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1602# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001603
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001604
Senthil Kumarande49d642011-10-16 23:54:44 +08001605class RequestTests(unittest.TestCase):
1606 """Unit tests for urllib.request.Request."""
1607
1608 def test_default_values(self):
1609 Request = urllib.request.Request
1610 request = Request("http://www.python.org")
1611 self.assertEqual(request.get_method(), 'GET')
1612 request = Request("http://www.python.org", {})
1613 self.assertEqual(request.get_method(), 'POST')
1614
1615 def test_with_method_arg(self):
1616 Request = urllib.request.Request
1617 request = Request("http://www.python.org", method='HEAD')
1618 self.assertEqual(request.method, 'HEAD')
1619 self.assertEqual(request.get_method(), 'HEAD')
1620 request = Request("http://www.python.org", {}, method='HEAD')
1621 self.assertEqual(request.method, 'HEAD')
1622 self.assertEqual(request.get_method(), 'HEAD')
1623 request = Request("http://www.python.org", method='GET')
1624 self.assertEqual(request.get_method(), 'GET')
1625 request.method = 'HEAD'
1626 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001627
1628
Senthil Kumaran277e9092013-04-10 20:51:19 -07001629class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001630
Senthil Kumaran277e9092013-04-10 20:51:19 -07001631 def test_converting_drive_letter(self):
1632 self.assertEqual(url2pathname("///C|"), 'C:')
1633 self.assertEqual(url2pathname("///C:"), 'C:')
1634 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001635
Senthil Kumaran277e9092013-04-10 20:51:19 -07001636 def test_converting_when_no_drive_letter(self):
1637 # cannot end a raw string in \
1638 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1639 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1640
1641 def test_simple_compare(self):
1642 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1643 r'C:\foo\bar\spam.foo')
1644
1645 def test_non_ascii_drive_letter(self):
1646 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1647
1648 def test_roundtrip_url2pathname(self):
1649 list_of_paths = ['C:',
1650 r'\\\C\test\\',
1651 r'C:\foo\bar\spam.foo'
1652 ]
1653 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001654 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001655
1656class PathName2URLTests(unittest.TestCase):
1657
1658 def test_converting_drive_letter(self):
1659 self.assertEqual(pathname2url("C:"), '///C:')
1660 self.assertEqual(pathname2url("C:\\"), '///C:')
1661
1662 def test_converting_when_no_drive_letter(self):
1663 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1664 '/////folder/test/')
1665 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1666 '////folder/test/')
1667 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1668 '/folder/test/')
1669
1670 def test_simple_compare(self):
1671 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1672 "///C:/foo/bar/spam.foo" )
1673
1674 def test_long_drive_letter(self):
1675 self.assertRaises(IOError, pathname2url, "XX:\\")
1676
1677 def test_roundtrip_pathname2url(self):
1678 list_of_paths = ['///C:',
1679 '/////folder/test/',
1680 '///C:/foo/bar/spam.foo']
1681 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001682 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001683
1684if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001685 unittest.main()