blob: 8f06b08afa0ee7bcf0f1cc564512e1817fc8372a [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030059def fakehttp(fakedata):
60 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
93 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030094
95 return FakeHTTPConnection
96
97
Senthil Kumarance260142011-11-01 01:35:17 +080098class FakeHTTPMixin(object):
99 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +0800100 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300101 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +0800102
103 def unfakehttp(self):
104 http.client.HTTPConnection = self._connection_class
105
106
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700107class FakeFTPMixin(object):
108 def fakeftp(self):
109 class FakeFtpWrapper(object):
110 def __init__(self, user, passwd, host, port, dirs, timeout=None,
111 persistent=True):
112 pass
113
114 def retrfile(self, file, type):
115 return io.BytesIO(), 0
116
117 def close(self):
118 pass
119
120 self._ftpwrapper_class = urllib.request.ftpwrapper
121 urllib.request.ftpwrapper = FakeFtpWrapper
122
123 def unfakeftp(self):
124 urllib.request.ftpwrapper = self._ftpwrapper_class
125
126
Brett Cannon74bfd702003-04-25 09:39:47 +0000127class urlopen_FileTests(unittest.TestCase):
128 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000129
Brett Cannon74bfd702003-04-25 09:39:47 +0000130 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000131 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000132
Brett Cannon74bfd702003-04-25 09:39:47 +0000133 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000134
Brett Cannon74bfd702003-04-25 09:39:47 +0000135 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 # Create a temp file to use for testing
137 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
138 "ascii")
139 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000140 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000143 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000144 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000146
Brett Cannon74bfd702003-04-25 09:39:47 +0000147 def tearDown(self):
148 """Shut down the open object"""
149 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000150 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000151
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 def test_interface(self):
153 # Make sure object returned by urlopen() has the specified methods
154 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000155 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000156 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000157 "object returned by urlopen() lacks %s attribute" %
158 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000159
Brett Cannon74bfd702003-04-25 09:39:47 +0000160 def test_read(self):
161 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000162
Brett Cannon74bfd702003-04-25 09:39:47 +0000163 def test_readline(self):
164 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000165 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "calling readline() after exhausting the file did not"
167 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_readlines(self):
170 lines_list = self.returned_obj.readlines()
171 self.assertEqual(len(lines_list), 1,
172 "readlines() returned the wrong number of lines")
173 self.assertEqual(lines_list[0], self.text,
174 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000175
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 def test_fileno(self):
177 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000178 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000179 self.assertEqual(os.read(file_num, len(self.text)), self.text,
180 "Reading on the file descriptor returned by fileno() "
181 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800184 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 # by the tearDown() method for the test
186 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000187
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000189 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000190
Brett Cannon74bfd702003-04-25 09:39:47 +0000191 def test_geturl(self):
192 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000193
Christian Heimes9bd667a2008-01-20 15:14:11 +0000194 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000195 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_iter(self):
198 # Test iterator
199 # Don't need to count number of iterations since test would fail the
200 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200201 # comparison.
202 # Use the iterator in the usual implicit way to test for ticket #4608.
203 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000204 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000205
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800206 def test_relativelocalfile(self):
207 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
208
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000209class ProxyTests(unittest.TestCase):
210
211 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000212 # Records changes to env vars
213 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000214 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000215 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000216 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000217 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000218
219 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000220 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000221 self.env.__exit__()
222 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000223
224 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000225 self.env.set('NO_PROXY', 'localhost')
226 proxies = urllib.request.getproxies_environment()
227 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000228 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800229 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700230 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800231 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700232 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
233 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
234
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700235 def test_proxy_cgi_ignore(self):
236 try:
237 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
238 proxies = urllib.request.getproxies_environment()
239 self.assertEqual('http://somewhere:3128', proxies['http'])
240 self.env.set('REQUEST_METHOD', 'GET')
241 proxies = urllib.request.getproxies_environment()
242 self.assertNotIn('http', proxies)
243 finally:
244 self.env.unset('REQUEST_METHOD')
245 self.env.unset('HTTP_PROXY')
246
Martin Panteraa279822016-04-30 01:03:40 +0000247 def test_proxy_bypass_environment_host_match(self):
248 bypass = urllib.request.proxy_bypass_environment
249 self.env.set('NO_PROXY',
250 'localhost, anotherdomain.com, newdomain.com:1234')
251 self.assertTrue(bypass('localhost'))
252 self.assertTrue(bypass('LocalHost')) # MixedCase
253 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
254 self.assertTrue(bypass('newdomain.com:1234'))
255 self.assertTrue(bypass('anotherdomain.com:8888'))
256 self.assertTrue(bypass('www.newdomain.com:1234'))
257 self.assertFalse(bypass('prelocalhost'))
258 self.assertFalse(bypass('newdomain.com')) # no port
259 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700260
261class ProxyTests_withOrderedEnv(unittest.TestCase):
262
263 def setUp(self):
264 # We need to test conditions, where variable order _is_ significant
265 self._saved_env = os.environ
266 # Monkey patch os.environ, start with empty fake environment
267 os.environ = collections.OrderedDict()
268
269 def tearDown(self):
270 os.environ = self._saved_env
271
272 def test_getproxies_environment_prefer_lowercase(self):
273 # Test lowercase preference with removal
274 os.environ['no_proxy'] = ''
275 os.environ['No_Proxy'] = 'localhost'
276 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
277 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
278 os.environ['http_proxy'] = ''
279 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
280 proxies = urllib.request.getproxies_environment()
281 self.assertEqual({}, proxies)
282 # Test lowercase preference of proxy bypass and correct matching including ports
283 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
284 os.environ['No_Proxy'] = 'xyz.com'
285 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
286 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
287 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
288 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
289 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
290 # Test lowercase preference with replacement
291 os.environ['http_proxy'] = 'http://somewhere:3128'
292 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
293 proxies = urllib.request.getproxies_environment()
294 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000295
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700296class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000297 """Test urlopen() opening a fake http connection."""
298
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000299 def check_read(self, ver):
300 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000301 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000302 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000303 self.assertEqual(fp.readline(), b"Hello!")
304 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000305 self.assertEqual(fp.geturl(), 'http://python.org/')
306 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000307 finally:
308 self.unfakehttp()
309
Senthil Kumaran26430412011-04-13 07:01:19 +0800310 def test_url_fragment(self):
311 # Issue #11703: geturl() omits fragments in the original URL.
312 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800313 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800314 try:
315 fp = urllib.request.urlopen(url)
316 self.assertEqual(fp.geturl(), url)
317 finally:
318 self.unfakehttp()
319
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800320 def test_willclose(self):
321 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800322 try:
323 resp = urlopen("http://www.python.org")
324 self.assertTrue(resp.fp.will_close)
325 finally:
326 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800327
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000328 def test_read_0_9(self):
329 # "0.9" response accepted (but not "simple responses" without
330 # a status line)
331 self.check_read(b"0.9")
332
333 def test_read_1_0(self):
334 self.check_read(b"1.0")
335
336 def test_read_1_1(self):
337 self.check_read(b"1.1")
338
Christian Heimes57dddfb2008-01-02 18:30:52 +0000339 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200340 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000341 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
342Date: Wed, 02 Jan 2008 03:03:54 GMT
343Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
344Connection: close
345Content-Type: text/html; charset=iso-8859-1
346''')
347 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200348 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000349 finally:
350 self.unfakehttp()
351
guido@google.coma119df92011-03-29 11:41:02 -0700352 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200353 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700354 self.fakehttp(b'''HTTP/1.1 302 Found
355Date: Wed, 02 Jan 2008 03:03:54 GMT
356Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
357Location: file://guidocomputer.athome.com:/python/license
358Connection: close
359Content-Type: text/html; charset=iso-8859-1
360''')
361 try:
Martin Pantera0370222016-02-04 06:01:35 +0000362 msg = "Redirection to url 'file:"
363 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
364 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700365 finally:
366 self.unfakehttp()
367
Martin Pantera0370222016-02-04 06:01:35 +0000368 def test_redirect_limit_independent(self):
369 # Ticket #12923: make sure independent requests each use their
370 # own retry limit.
371 for i in range(FancyURLopener().maxtries):
372 self.fakehttp(b'''HTTP/1.1 302 Found
373Location: file://guidocomputer.athome.com:/python/license
374Connection: close
375''')
376 try:
377 self.assertRaises(urllib.error.HTTPError, urlopen,
378 "http://something")
379 finally:
380 self.unfakehttp()
381
Guido van Rossumd8faa362007-04-27 19:54:29 +0000382 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000384 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000385 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000386 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200387 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000388 finally:
389 self.unfakehttp()
390
Senthil Kumaranf5776862012-10-21 13:30:02 -0700391 def test_missing_localfile(self):
392 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700393 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700394 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700395 self.assertTrue(e.exception.filename)
396 self.assertTrue(e.exception.reason)
397
398 def test_file_notexists(self):
399 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700400 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700401 try:
402 self.assertTrue(os.path.exists(tmp_file))
403 with urlopen(tmp_fileurl) as fobj:
404 self.assertTrue(fobj)
405 finally:
406 os.close(fd)
407 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700408 self.assertFalse(os.path.exists(tmp_file))
409 with self.assertRaises(urllib.error.URLError):
410 urlopen(tmp_fileurl)
411
412 def test_ftp_nohost(self):
413 test_ftp_url = 'ftp:///path'
414 with self.assertRaises(urllib.error.URLError) as e:
415 urlopen(test_ftp_url)
416 self.assertFalse(e.exception.filename)
417 self.assertTrue(e.exception.reason)
418
419 def test_ftp_nonexisting(self):
420 with self.assertRaises(urllib.error.URLError) as e:
421 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
422 self.assertFalse(e.exception.filename)
423 self.assertTrue(e.exception.reason)
424
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700425 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
426 def test_ftp_cache_pruning(self):
427 self.fakeftp()
428 try:
429 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
430 urlopen('ftp://localhost')
431 finally:
432 self.unfakeftp()
433
Senthil Kumaranf5776862012-10-21 13:30:02 -0700434
Senthil Kumarande0eb242010-08-01 17:53:37 +0000435 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000436 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000437 try:
438 fp = urlopen("http://user:pass@python.org/")
439 self.assertEqual(fp.readline(), b"Hello!")
440 self.assertEqual(fp.readline(), b"")
441 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
442 self.assertEqual(fp.getcode(), 200)
443 finally:
444 self.unfakehttp()
445
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800446 def test_userpass_inurl_w_spaces(self):
447 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
448 try:
449 userpass = "a b:c d"
450 url = "http://{}@python.org/".format(userpass)
451 fakehttp_wrapper = http.client.HTTPConnection
452 authorization = ("Authorization: Basic %s\r\n" %
453 b64encode(userpass.encode("ASCII")).decode("ASCII"))
454 fp = urlopen(url)
455 # The authorization header must be in place
456 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
457 self.assertEqual(fp.readline(), b"Hello!")
458 self.assertEqual(fp.readline(), b"")
459 # the spaces are quoted in URL so no match
460 self.assertNotEqual(fp.geturl(), url)
461 self.assertEqual(fp.getcode(), 200)
462 finally:
463 self.unfakehttp()
464
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700465 def test_URLopener_deprecation(self):
466 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700467 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700468
Antoine Pitrou07df6552014-11-02 17:23:14 +0100469 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800470 def test_cafile_and_context(self):
471 context = ssl.create_default_context()
472 with self.assertRaises(ValueError):
473 urllib.request.urlopen(
474 "https://localhost", cafile="/nonexistent/path", context=context
475 )
476
Antoine Pitroudf204be2012-11-24 17:59:08 +0100477class urlopen_DataTests(unittest.TestCase):
478 """Test urlopen() opening a data URL."""
479
480 def setUp(self):
481 # text containing URL special- and unicode-characters
482 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
483 # 2x1 pixel RGB PNG image with one black and one white pixel
484 self.image = (
485 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
486 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
487 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
488 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
489
490 self.text_url = (
491 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
492 "D%26%20%C3%B6%20%C3%84%20")
493 self.text_url_base64 = (
494 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
495 "sJT0mIPYgxCA%3D")
496 # base64 encoded data URL that contains ignorable spaces,
497 # such as "\n", " ", "%0A", and "%20".
498 self.image_url = (
499 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
500 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
501 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
502
503 self.text_url_resp = urllib.request.urlopen(self.text_url)
504 self.text_url_base64_resp = urllib.request.urlopen(
505 self.text_url_base64)
506 self.image_url_resp = urllib.request.urlopen(self.image_url)
507
508 def test_interface(self):
509 # Make sure object returned by urlopen() has the specified methods
510 for attr in ("read", "readline", "readlines",
511 "close", "info", "geturl", "getcode", "__iter__"):
512 self.assertTrue(hasattr(self.text_url_resp, attr),
513 "object returned by urlopen() lacks %s attribute" %
514 attr)
515
516 def test_info(self):
517 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
518 self.assertEqual(self.text_url_base64_resp.info().get_params(),
519 [('text/plain', ''), ('charset', 'ISO-8859-1')])
520 self.assertEqual(self.image_url_resp.info()['content-length'],
521 str(len(self.image)))
522 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
523 [('text/plain', ''), ('charset', 'US-ASCII')])
524
525 def test_geturl(self):
526 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
527 self.assertEqual(self.text_url_base64_resp.geturl(),
528 self.text_url_base64)
529 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
530
531 def test_read_text(self):
532 self.assertEqual(self.text_url_resp.read().decode(
533 dict(self.text_url_resp.info().get_params())['charset']), self.text)
534
535 def test_read_text_base64(self):
536 self.assertEqual(self.text_url_base64_resp.read().decode(
537 dict(self.text_url_base64_resp.info().get_params())['charset']),
538 self.text)
539
540 def test_read_image(self):
541 self.assertEqual(self.image_url_resp.read(), self.image)
542
543 def test_missing_comma(self):
544 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
545
546 def test_invalid_base64_data(self):
547 # missing padding character
548 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
549
Brett Cannon19691362003-04-29 05:08:06 +0000550class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000551 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000552
Brett Cannon19691362003-04-29 05:08:06 +0000553 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000554 # Create a list of temporary files. Each item in the list is a file
555 # name (absolute path or relative to the current working directory).
556 # All files in this list will be deleted in the tearDown method. Note,
557 # this only helps to makes sure temporary files get deleted, but it
558 # does nothing about trying to close files that may still be open. It
559 # is the responsibility of the developer to properly close files even
560 # when exceptional conditions occur.
561 self.tempFiles = []
562
Brett Cannon19691362003-04-29 05:08:06 +0000563 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000564 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000565 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000566 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000567 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000568 FILE.write(self.text)
569 FILE.close()
570 finally:
571 try: FILE.close()
572 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000573
574 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000575 # Delete the temporary files.
576 for each in self.tempFiles:
577 try: os.remove(each)
578 except: pass
579
580 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000581 filePath = os.path.abspath(filePath)
582 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000583 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000584 except UnicodeEncodeError:
585 raise unittest.SkipTest("filePath is not encodable to utf8")
586 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000587
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000588 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000589 """Creates a new temporary file containing the specified data,
590 registers the file for deletion during the test fixture tear down, and
591 returns the absolute path of the file."""
592
593 newFd, newFilePath = tempfile.mkstemp()
594 try:
595 self.registerFileForCleanUp(newFilePath)
596 newFile = os.fdopen(newFd, "wb")
597 newFile.write(data)
598 newFile.close()
599 finally:
600 try: newFile.close()
601 except: pass
602 return newFilePath
603
604 def registerFileForCleanUp(self, fileName):
605 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000606
607 def test_basic(self):
608 # Make sure that a local file just gets its own location returned and
609 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000610 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000611 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000612 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000613 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000614 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000615
616 def test_copy(self):
617 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000618 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000619 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000620 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000621 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000622 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000623 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000624 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000625 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000626 try:
627 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000628 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000629 finally:
630 try: FILE.close()
631 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000632 self.assertEqual(self.text, text)
633
634 def test_reporthook(self):
635 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700636 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
637 self.assertIsInstance(block_count, int)
638 self.assertIsInstance(block_read_size, int)
639 self.assertIsInstance(file_size, int)
640 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000641 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000642 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000643 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000644 urllib.request.urlretrieve(
645 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000646 second_temp, hooktester)
647
648 def test_reporthook_0_bytes(self):
649 # Test on zero length file. Should call reporthook only 1 time.
650 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700651 def hooktester(block_count, block_read_size, file_size, _report=report):
652 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000653 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000655 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000656 self.assertEqual(len(report), 1)
657 self.assertEqual(report[0][2], 0)
658
659 def test_reporthook_5_bytes(self):
660 # Test on 5 byte file. Should call reporthook only 2 times (once when
661 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700662 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000663 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700664 def hooktester(block_count, block_read_size, file_size, _report=report):
665 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000666 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000667 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000668 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000669 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800670 self.assertEqual(report[0][2], 5)
671 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000672
673 def test_reporthook_8193_bytes(self):
674 # Test on 8193 byte file. Should call reporthook only 3 times (once
675 # when the "network connection" is established, once for the next 8192
676 # bytes, and once for the last byte).
677 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700678 def hooktester(block_count, block_read_size, file_size, _report=report):
679 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000680 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000681 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000682 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000683 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800684 self.assertEqual(report[0][2], 8193)
685 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700686 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800687 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000688
Senthil Kumarance260142011-11-01 01:35:17 +0800689
690class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
691 """Test urllib.urlretrieve() using fake http connections"""
692
693 def test_short_content_raises_ContentTooShortError(self):
694 self.fakehttp(b'''HTTP/1.1 200 OK
695Date: Wed, 02 Jan 2008 03:03:54 GMT
696Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
697Connection: close
698Content-Length: 100
699Content-Type: text/html; charset=iso-8859-1
700
701FF
702''')
703
704 def _reporthook(par1, par2, par3):
705 pass
706
707 with self.assertRaises(urllib.error.ContentTooShortError):
708 try:
709 urllib.request.urlretrieve('http://example.com/',
710 reporthook=_reporthook)
711 finally:
712 self.unfakehttp()
713
714 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
715 self.fakehttp(b'''HTTP/1.1 200 OK
716Date: Wed, 02 Jan 2008 03:03:54 GMT
717Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
718Connection: close
719Content-Length: 100
720Content-Type: text/html; charset=iso-8859-1
721
722FF
723''')
724 with self.assertRaises(urllib.error.ContentTooShortError):
725 try:
726 urllib.request.urlretrieve('http://example.com/')
727 finally:
728 self.unfakehttp()
729
730
Brett Cannon74bfd702003-04-25 09:39:47 +0000731class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400732 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000733
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000734 According to RFC 2396 (Uniform Resource Identifiers), to escape a
735 character you write it as '%' + <2 character US-ASCII hex value>.
736 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
737 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000738
739 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000740
Brett Cannon74bfd702003-04-25 09:39:47 +0000741 Reserved characters : ";/?:@&=+$,"
742 Have special meaning in URIs and must be escaped if not being used for
743 their special meaning
744 Data characters : letters, digits, and "-_.!~*'()"
745 Unreserved and do not need to be escaped; can be, though, if desired
746 Control characters : 0x00 - 0x1F, 0x7F
747 Have no use in URIs so must be escaped
748 space : 0x20
749 Must be escaped
750 Delimiters : '<>#%"'
751 Must be escaped
752 Unwise : "{}|\^[]`"
753 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000754
Brett Cannon74bfd702003-04-25 09:39:47 +0000755 """
756
757 def test_never_quote(self):
758 # Make sure quote() does not quote letters, digits, and "_,.-"
759 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
760 "abcdefghijklmnopqrstuvwxyz",
761 "0123456789",
762 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000763 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000764 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000765 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000766 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000767 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000768 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000769
770 def test_default_safe(self):
771 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000772 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000773
774 def test_safe(self):
775 # Test setting 'safe' parameter does what it should do
776 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000777 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000778 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000779 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000780 result = urllib.parse.quote_plus(quote_by_default,
781 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000782 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000783 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000784 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000785 # Safe expressed as bytes rather than str
786 result = urllib.parse.quote(quote_by_default, safe=b"<>")
787 self.assertEqual(quote_by_default, result,
788 "using quote(): %r != %r" % (quote_by_default, result))
789 # "Safe" non-ASCII characters should have no effect
790 # (Since URIs are not allowed to have non-ASCII characters)
791 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
792 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
793 self.assertEqual(expect, result,
794 "using quote(): %r != %r" %
795 (expect, result))
796 # Same as above, but using a bytes rather than str
797 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
798 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
799 self.assertEqual(expect, result,
800 "using quote(): %r != %r" %
801 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000802
803 def test_default_quoting(self):
804 # Make sure all characters that should be quoted are by default sans
805 # space (separate test for that).
806 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400807 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000808 should_quote.append(chr(127)) # For 0x7F
809 should_quote = ''.join(should_quote)
810 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000811 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000812 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000813 "using quote(): "
814 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000815 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000816 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000817 self.assertEqual(hexescape(char), result,
818 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000819 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000820 (char, hexescape(char), result))
821 del should_quote
822 partial_quote = "ab[]cd"
823 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000824 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000825 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000826 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800827 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000828 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000829 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000830
831 def test_quoting_space(self):
832 # Make sure quote() and quote_plus() handle spaces as specified in
833 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000834 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000835 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000836 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000837 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000838 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000839 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000840 given = "a b cd e f"
841 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000842 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000843 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000844 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000845 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000846 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000847 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000848 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000849
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000850 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000851 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000852 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000853 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000854 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000855 # Test with bytes
856 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
857 'alpha%2Bbeta+gamma')
858 # Test with safe bytes
859 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
860 'alpha+beta+gamma')
861
862 def test_quote_bytes(self):
863 # Bytes should quote directly to percent-encoded values
864 given = b"\xa2\xd8ab\xff"
865 expect = "%A2%D8ab%FF"
866 result = urllib.parse.quote(given)
867 self.assertEqual(expect, result,
868 "using quote(): %r != %r" % (expect, result))
869 # Encoding argument should raise type error on bytes input
870 self.assertRaises(TypeError, urllib.parse.quote, given,
871 encoding="latin-1")
872 # quote_from_bytes should work the same
873 result = urllib.parse.quote_from_bytes(given)
874 self.assertEqual(expect, result,
875 "using quote_from_bytes(): %r != %r"
876 % (expect, result))
877
878 def test_quote_with_unicode(self):
879 # Characters in Latin-1 range, encoded by default in UTF-8
880 given = "\xa2\xd8ab\xff"
881 expect = "%C2%A2%C3%98ab%C3%BF"
882 result = urllib.parse.quote(given)
883 self.assertEqual(expect, result,
884 "using quote(): %r != %r" % (expect, result))
885 # Characters in Latin-1 range, encoded by with None (default)
886 result = urllib.parse.quote(given, encoding=None, errors=None)
887 self.assertEqual(expect, result,
888 "using quote(): %r != %r" % (expect, result))
889 # Characters in Latin-1 range, encoded with Latin-1
890 given = "\xa2\xd8ab\xff"
891 expect = "%A2%D8ab%FF"
892 result = urllib.parse.quote(given, encoding="latin-1")
893 self.assertEqual(expect, result,
894 "using quote(): %r != %r" % (expect, result))
895 # Characters in BMP, encoded by default in UTF-8
896 given = "\u6f22\u5b57" # "Kanji"
897 expect = "%E6%BC%A2%E5%AD%97"
898 result = urllib.parse.quote(given)
899 self.assertEqual(expect, result,
900 "using quote(): %r != %r" % (expect, result))
901 # Characters in BMP, encoded with Latin-1
902 given = "\u6f22\u5b57"
903 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
904 encoding="latin-1")
905 # Characters in BMP, encoded with Latin-1, with replace error handling
906 given = "\u6f22\u5b57"
907 expect = "%3F%3F" # "??"
908 result = urllib.parse.quote(given, encoding="latin-1",
909 errors="replace")
910 self.assertEqual(expect, result,
911 "using quote(): %r != %r" % (expect, result))
912 # Characters in BMP, Latin-1, with xmlcharref error handling
913 given = "\u6f22\u5b57"
914 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
915 result = urllib.parse.quote(given, encoding="latin-1",
916 errors="xmlcharrefreplace")
917 self.assertEqual(expect, result,
918 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000919
Georg Brandlfaf41492009-05-26 18:31:11 +0000920 def test_quote_plus_with_unicode(self):
921 # Encoding (latin-1) test for quote_plus
922 given = "\xa2\xd8 \xff"
923 expect = "%A2%D8+%FF"
924 result = urllib.parse.quote_plus(given, encoding="latin-1")
925 self.assertEqual(expect, result,
926 "using quote_plus(): %r != %r" % (expect, result))
927 # Errors test for quote_plus
928 given = "ab\u6f22\u5b57 cd"
929 expect = "ab%3F%3F+cd"
930 result = urllib.parse.quote_plus(given, encoding="latin-1",
931 errors="replace")
932 self.assertEqual(expect, result,
933 "using quote_plus(): %r != %r" % (expect, result))
934
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000935
Brett Cannon74bfd702003-04-25 09:39:47 +0000936class UnquotingTests(unittest.TestCase):
937 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000938
Brett Cannon74bfd702003-04-25 09:39:47 +0000939 See the doc string for quoting_Tests for details on quoting and such.
940
941 """
942
943 def test_unquoting(self):
944 # Make sure unquoting of all ASCII values works
945 escape_list = []
946 for num in range(128):
947 given = hexescape(chr(num))
948 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000949 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000950 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000951 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000952 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000953 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000954 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000955 (expect, result))
956 escape_list.append(given)
957 escape_string = ''.join(escape_list)
958 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000959 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000960 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000961 "using unquote(): not all characters escaped: "
962 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000963 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
964 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000965 with support.check_warnings(('', BytesWarning), quiet=True):
966 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000967
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000968 def test_unquoting_badpercent(self):
969 # Test unquoting on bad percent-escapes
970 given = '%xab'
971 expect = given
972 result = urllib.parse.unquote(given)
973 self.assertEqual(expect, result, "using unquote(): %r != %r"
974 % (expect, result))
975 given = '%x'
976 expect = given
977 result = urllib.parse.unquote(given)
978 self.assertEqual(expect, result, "using unquote(): %r != %r"
979 % (expect, result))
980 given = '%'
981 expect = given
982 result = urllib.parse.unquote(given)
983 self.assertEqual(expect, result, "using unquote(): %r != %r"
984 % (expect, result))
985 # unquote_to_bytes
986 given = '%xab'
987 expect = bytes(given, 'ascii')
988 result = urllib.parse.unquote_to_bytes(given)
989 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
990 % (expect, result))
991 given = '%x'
992 expect = bytes(given, 'ascii')
993 result = urllib.parse.unquote_to_bytes(given)
994 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
995 % (expect, result))
996 given = '%'
997 expect = bytes(given, 'ascii')
998 result = urllib.parse.unquote_to_bytes(given)
999 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1000 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001001 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1002 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001003
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001004 def test_unquoting_mixed_case(self):
1005 # Test unquoting on mixed-case hex digits in the percent-escapes
1006 given = '%Ab%eA'
1007 expect = b'\xab\xea'
1008 result = urllib.parse.unquote_to_bytes(given)
1009 self.assertEqual(expect, result,
1010 "using unquote_to_bytes(): %r != %r"
1011 % (expect, result))
1012
Brett Cannon74bfd702003-04-25 09:39:47 +00001013 def test_unquoting_parts(self):
1014 # Make sure unquoting works when have non-quoted characters
1015 # interspersed
1016 given = 'ab%sd' % hexescape('c')
1017 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001018 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001019 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001020 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001021 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001022 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001023 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001024
Brett Cannon74bfd702003-04-25 09:39:47 +00001025 def test_unquoting_plus(self):
1026 # Test difference between unquote() and unquote_plus()
1027 given = "are+there+spaces..."
1028 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001029 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001030 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001031 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001032 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001033 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001034 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001035 "using unquote_plus(): %r != %r" % (expect, result))
1036
1037 def test_unquote_to_bytes(self):
1038 given = 'br%C3%BCckner_sapporo_20050930.doc'
1039 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1040 result = urllib.parse.unquote_to_bytes(given)
1041 self.assertEqual(expect, result,
1042 "using unquote_to_bytes(): %r != %r"
1043 % (expect, result))
1044 # Test on a string with unescaped non-ASCII characters
1045 # (Technically an invalid URI; expect those characters to be UTF-8
1046 # encoded).
1047 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1048 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1049 self.assertEqual(expect, result,
1050 "using unquote_to_bytes(): %r != %r"
1051 % (expect, result))
1052 # Test with a bytes as input
1053 given = b'%A2%D8ab%FF'
1054 expect = b'\xa2\xd8ab\xff'
1055 result = urllib.parse.unquote_to_bytes(given)
1056 self.assertEqual(expect, result,
1057 "using unquote_to_bytes(): %r != %r"
1058 % (expect, result))
1059 # Test with a bytes as input, with unescaped non-ASCII bytes
1060 # (Technically an invalid URI; expect those bytes to be preserved)
1061 given = b'%A2\xd8ab%FF'
1062 expect = b'\xa2\xd8ab\xff'
1063 result = urllib.parse.unquote_to_bytes(given)
1064 self.assertEqual(expect, result,
1065 "using unquote_to_bytes(): %r != %r"
1066 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001067
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001068 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001069 # Characters in the Latin-1 range, encoded with UTF-8
1070 given = 'br%C3%BCckner_sapporo_20050930.doc'
1071 expect = 'br\u00fcckner_sapporo_20050930.doc'
1072 result = urllib.parse.unquote(given)
1073 self.assertEqual(expect, result,
1074 "using unquote(): %r != %r" % (expect, result))
1075 # Characters in the Latin-1 range, encoded with None (default)
1076 result = urllib.parse.unquote(given, encoding=None, errors=None)
1077 self.assertEqual(expect, result,
1078 "using unquote(): %r != %r" % (expect, result))
1079
1080 # Characters in the Latin-1 range, encoded with Latin-1
1081 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1082 encoding="latin-1")
1083 expect = 'br\u00fcckner_sapporo_20050930.doc'
1084 self.assertEqual(expect, result,
1085 "using unquote(): %r != %r" % (expect, result))
1086
1087 # Characters in BMP, encoded with UTF-8
1088 given = "%E6%BC%A2%E5%AD%97"
1089 expect = "\u6f22\u5b57" # "Kanji"
1090 result = urllib.parse.unquote(given)
1091 self.assertEqual(expect, result,
1092 "using unquote(): %r != %r" % (expect, result))
1093
1094 # Decode with UTF-8, invalid sequence
1095 given = "%F3%B1"
1096 expect = "\ufffd" # Replacement character
1097 result = urllib.parse.unquote(given)
1098 self.assertEqual(expect, result,
1099 "using unquote(): %r != %r" % (expect, result))
1100
1101 # Decode with UTF-8, invalid sequence, replace errors
1102 result = urllib.parse.unquote(given, errors="replace")
1103 self.assertEqual(expect, result,
1104 "using unquote(): %r != %r" % (expect, result))
1105
1106 # Decode with UTF-8, invalid sequence, ignoring errors
1107 given = "%F3%B1"
1108 expect = ""
1109 result = urllib.parse.unquote(given, errors="ignore")
1110 self.assertEqual(expect, result,
1111 "using unquote(): %r != %r" % (expect, result))
1112
1113 # A mix of non-ASCII and percent-encoded characters, UTF-8
1114 result = urllib.parse.unquote("\u6f22%C3%BC")
1115 expect = '\u6f22\u00fc'
1116 self.assertEqual(expect, result,
1117 "using unquote(): %r != %r" % (expect, result))
1118
1119 # A mix of non-ASCII and percent-encoded characters, Latin-1
1120 # (Note, the string contains non-Latin-1-representable characters)
1121 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1122 expect = '\u6f22\u00fc'
1123 self.assertEqual(expect, result,
1124 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001125
Brett Cannon74bfd702003-04-25 09:39:47 +00001126class urlencode_Tests(unittest.TestCase):
1127 """Tests for urlencode()"""
1128
1129 def help_inputtype(self, given, test_type):
1130 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001131
Brett Cannon74bfd702003-04-25 09:39:47 +00001132 'given' must lead to only the pairs:
1133 * 1st, 1
1134 * 2nd, 2
1135 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001136
Brett Cannon74bfd702003-04-25 09:39:47 +00001137 Test cannot assume anything about order. Docs make no guarantee and
1138 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001139
Brett Cannon74bfd702003-04-25 09:39:47 +00001140 """
1141 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001142 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001143 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001144 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001145 "testing %s: %s not found in %s" %
1146 (test_type, expected, result))
1147 self.assertEqual(result.count('&'), 2,
1148 "testing %s: expected 2 '&'s; got %s" %
1149 (test_type, result.count('&')))
1150 amp_location = result.index('&')
1151 on_amp_left = result[amp_location - 1]
1152 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001153 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001154 "testing %s: '&' not located in proper place in %s" %
1155 (test_type, result))
1156 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1157 "testing %s: "
1158 "unexpected number of characters: %s != %s" %
1159 (test_type, len(result), (5 * 3) + 2))
1160
1161 def test_using_mapping(self):
1162 # Test passing in a mapping object as an argument.
1163 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1164 "using dict as input type")
1165
1166 def test_using_sequence(self):
1167 # Test passing in a sequence of two-item sequences as an argument.
1168 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1169 "using sequence of two-item tuples as input")
1170
1171 def test_quoting(self):
1172 # Make sure keys and values are quoted using quote_plus()
1173 given = {"&":"="}
1174 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001175 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001176 self.assertEqual(expect, result)
1177 given = {"key name":"A bunch of pluses"}
1178 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001179 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001180 self.assertEqual(expect, result)
1181
1182 def test_doseq(self):
1183 # Test that passing True for 'doseq' parameter works correctly
1184 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001185 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1186 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001187 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001188 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001189 for value in given["sequence"]:
1190 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001191 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001192 self.assertEqual(result.count('&'), 2,
1193 "Expected 2 '&'s, got %s" % result.count('&'))
1194
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001195 def test_empty_sequence(self):
1196 self.assertEqual("", urllib.parse.urlencode({}))
1197 self.assertEqual("", urllib.parse.urlencode([]))
1198
1199 def test_nonstring_values(self):
1200 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1201 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1202
1203 def test_nonstring_seq_values(self):
1204 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1205 self.assertEqual("a=None&a=a",
1206 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001207 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001208 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001209 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001210
Senthil Kumarandf022da2010-07-03 17:48:22 +00001211 def test_urlencode_encoding(self):
1212 # ASCII encoding. Expect %3F with errors="replace'
1213 given = (('\u00a0', '\u00c1'),)
1214 expect = '%3F=%3F'
1215 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1216 self.assertEqual(expect, result)
1217
1218 # Default is UTF-8 encoding.
1219 given = (('\u00a0', '\u00c1'),)
1220 expect = '%C2%A0=%C3%81'
1221 result = urllib.parse.urlencode(given)
1222 self.assertEqual(expect, result)
1223
1224 # Latin-1 encoding.
1225 given = (('\u00a0', '\u00c1'),)
1226 expect = '%A0=%C1'
1227 result = urllib.parse.urlencode(given, encoding="latin-1")
1228 self.assertEqual(expect, result)
1229
1230 def test_urlencode_encoding_doseq(self):
1231 # ASCII Encoding. Expect %3F with errors="replace'
1232 given = (('\u00a0', '\u00c1'),)
1233 expect = '%3F=%3F'
1234 result = urllib.parse.urlencode(given, doseq=True,
1235 encoding="ASCII", errors="replace")
1236 self.assertEqual(expect, result)
1237
1238 # ASCII Encoding. On a sequence of values.
1239 given = (("\u00a0", (1, "\u00c1")),)
1240 expect = '%3F=1&%3F=%3F'
1241 result = urllib.parse.urlencode(given, True,
1242 encoding="ASCII", errors="replace")
1243 self.assertEqual(expect, result)
1244
1245 # Utf-8
1246 given = (("\u00a0", "\u00c1"),)
1247 expect = '%C2%A0=%C3%81'
1248 result = urllib.parse.urlencode(given, True)
1249 self.assertEqual(expect, result)
1250
1251 given = (("\u00a0", (42, "\u00c1")),)
1252 expect = '%C2%A0=42&%C2%A0=%C3%81'
1253 result = urllib.parse.urlencode(given, True)
1254 self.assertEqual(expect, result)
1255
1256 # latin-1
1257 given = (("\u00a0", "\u00c1"),)
1258 expect = '%A0=%C1'
1259 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1260 self.assertEqual(expect, result)
1261
1262 given = (("\u00a0", (42, "\u00c1")),)
1263 expect = '%A0=42&%A0=%C1'
1264 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1265 self.assertEqual(expect, result)
1266
1267 def test_urlencode_bytes(self):
1268 given = ((b'\xa0\x24', b'\xc1\x24'),)
1269 expect = '%A0%24=%C1%24'
1270 result = urllib.parse.urlencode(given)
1271 self.assertEqual(expect, result)
1272 result = urllib.parse.urlencode(given, True)
1273 self.assertEqual(expect, result)
1274
1275 # Sequence of values
1276 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1277 expect = '%A0%24=42&%A0%24=%C1%24'
1278 result = urllib.parse.urlencode(given, True)
1279 self.assertEqual(expect, result)
1280
1281 def test_urlencode_encoding_safe_parameter(self):
1282
1283 # Send '$' (\x24) as safe character
1284 # Default utf-8 encoding
1285
1286 given = ((b'\xa0\x24', b'\xc1\x24'),)
1287 result = urllib.parse.urlencode(given, safe=":$")
1288 expect = '%A0$=%C1$'
1289 self.assertEqual(expect, result)
1290
1291 given = ((b'\xa0\x24', b'\xc1\x24'),)
1292 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1293 expect = '%A0$=%C1$'
1294 self.assertEqual(expect, result)
1295
1296 # Safe parameter in sequence
1297 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1298 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1299 result = urllib.parse.urlencode(given, True, safe=":$")
1300 self.assertEqual(expect, result)
1301
1302 # Test all above in latin-1 encoding
1303
1304 given = ((b'\xa0\x24', b'\xc1\x24'),)
1305 result = urllib.parse.urlencode(given, safe=":$",
1306 encoding="latin-1")
1307 expect = '%A0$=%C1$'
1308 self.assertEqual(expect, result)
1309
1310 given = ((b'\xa0\x24', b'\xc1\x24'),)
1311 expect = '%A0$=%C1$'
1312 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1313 encoding="latin-1")
1314
1315 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1316 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1317 result = urllib.parse.urlencode(given, True, safe=":$",
1318 encoding="latin-1")
1319 self.assertEqual(expect, result)
1320
Brett Cannon74bfd702003-04-25 09:39:47 +00001321class Pathname_Tests(unittest.TestCase):
1322 """Test pathname2url() and url2pathname()"""
1323
1324 def test_basic(self):
1325 # Make sure simple tests pass
1326 expected_path = os.path.join("parts", "of", "a", "path")
1327 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001328 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001329 self.assertEqual(expected_url, result,
1330 "pathname2url() failed; %s != %s" %
1331 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001332 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001333 self.assertEqual(expected_path, result,
1334 "url2pathame() failed; %s != %s" %
1335 (result, expected_path))
1336
1337 def test_quoting(self):
1338 # Test automatic quoting and unquoting works for pathnam2url() and
1339 # url2pathname() respectively
1340 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001341 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1342 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001343 self.assertEqual(expect, result,
1344 "pathname2url() failed; %s != %s" %
1345 (expect, result))
1346 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001347 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001348 self.assertEqual(expect, result,
1349 "url2pathname() failed; %s != %s" %
1350 (expect, result))
1351 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001352 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1353 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001354 self.assertEqual(expect, result,
1355 "pathname2url() failed; %s != %s" %
1356 (expect, result))
1357 given = "make+sure/using_unquote"
1358 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001359 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001360 self.assertEqual(expect, result,
1361 "url2pathname() failed; %s != %s" %
1362 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001363
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001364 @unittest.skipUnless(sys.platform == 'win32',
1365 'test specific to the urllib.url2path function.')
1366 def test_ntpath(self):
1367 given = ('/C:/', '///C:/', '/C|//')
1368 expect = 'C:\\'
1369 for url in given:
1370 result = urllib.request.url2pathname(url)
1371 self.assertEqual(expect, result,
1372 'urllib.request..url2pathname() failed; %s != %s' %
1373 (expect, result))
1374 given = '///C|/path'
1375 expect = 'C:\\path'
1376 result = urllib.request.url2pathname(given)
1377 self.assertEqual(expect, result,
1378 'urllib.request.url2pathname() failed; %s != %s' %
1379 (expect, result))
1380
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001381class Utility_Tests(unittest.TestCase):
1382 """Testcase to test the various utility functions in the urllib."""
1383
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001384 def test_thishost(self):
1385 """Test the urllib.request.thishost utility function returns a tuple"""
1386 self.assertIsInstance(urllib.request.thishost(), tuple)
1387
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001388
1389class URLopener_Tests(unittest.TestCase):
1390 """Testcase to test the open method of URLopener class."""
1391
1392 def test_quoted_open(self):
1393 class DummyURLopener(urllib.request.URLopener):
1394 def open_spam(self, url):
1395 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001396 with support.check_warnings(
1397 ('DummyURLopener style of invoking requests is deprecated.',
1398 DeprecationWarning)):
1399 self.assertEqual(DummyURLopener().open(
1400 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001401
Ezio Melotti79b99db2013-02-21 02:41:42 +02001402 # test the safe characters are not quoted by urlopen
1403 self.assertEqual(DummyURLopener().open(
1404 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1405 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001406
Guido van Rossume7ba4952007-06-06 23:52:48 +00001407# Just commented them out.
1408# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001409# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001410# fail in one of the tests, sometimes in other. I have a linux, and
1411# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001412# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001413# . Facundo
1414#
1415# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001416# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001417# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1418# serv.settimeout(3)
1419# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1420# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001421# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001422# try:
1423# conn, addr = serv.accept()
1424# conn.send("1 Hola mundo\n")
1425# cantdata = 0
1426# while cantdata < 13:
1427# data = conn.recv(13-cantdata)
1428# cantdata += len(data)
1429# time.sleep(.3)
1430# conn.send("2 No more lines\n")
1431# conn.close()
1432# except socket.timeout:
1433# pass
1434# finally:
1435# serv.close()
1436# evt.set()
1437#
1438# class FTPWrapperTests(unittest.TestCase):
1439#
1440# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001441# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001442# ftplib.FTP.port = 9093
1443# self.evt = threading.Event()
1444# threading.Thread(target=server, args=(self.evt,)).start()
1445# time.sleep(.1)
1446#
1447# def tearDown(self):
1448# self.evt.wait()
1449#
1450# def testBasic(self):
1451# # connects
1452# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001453# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001454#
1455# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001456# # global default timeout is ignored
1457# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001458# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001459# socket.setdefaulttimeout(30)
1460# try:
1461# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1462# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001463# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001464# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001465# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001466#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001467# def testTimeoutDefault(self):
1468# # global default timeout is used
1469# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001470# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001471# socket.setdefaulttimeout(30)
1472# try:
1473# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1474# finally:
1475# socket.setdefaulttimeout(None)
1476# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1477# ftp.close()
1478#
1479# def testTimeoutValue(self):
1480# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1481# timeout=30)
1482# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1483# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001484
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001485
Senthil Kumarande49d642011-10-16 23:54:44 +08001486class RequestTests(unittest.TestCase):
1487 """Unit tests for urllib.request.Request."""
1488
1489 def test_default_values(self):
1490 Request = urllib.request.Request
1491 request = Request("http://www.python.org")
1492 self.assertEqual(request.get_method(), 'GET')
1493 request = Request("http://www.python.org", {})
1494 self.assertEqual(request.get_method(), 'POST')
1495
1496 def test_with_method_arg(self):
1497 Request = urllib.request.Request
1498 request = Request("http://www.python.org", method='HEAD')
1499 self.assertEqual(request.method, 'HEAD')
1500 self.assertEqual(request.get_method(), 'HEAD')
1501 request = Request("http://www.python.org", {}, method='HEAD')
1502 self.assertEqual(request.method, 'HEAD')
1503 self.assertEqual(request.get_method(), 'HEAD')
1504 request = Request("http://www.python.org", method='GET')
1505 self.assertEqual(request.get_method(), 'GET')
1506 request.method = 'HEAD'
1507 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001508
1509
Senthil Kumaran277e9092013-04-10 20:51:19 -07001510class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001511
Senthil Kumaran277e9092013-04-10 20:51:19 -07001512 def test_converting_drive_letter(self):
1513 self.assertEqual(url2pathname("///C|"), 'C:')
1514 self.assertEqual(url2pathname("///C:"), 'C:')
1515 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001516
Senthil Kumaran277e9092013-04-10 20:51:19 -07001517 def test_converting_when_no_drive_letter(self):
1518 # cannot end a raw string in \
1519 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1520 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1521
1522 def test_simple_compare(self):
1523 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1524 r'C:\foo\bar\spam.foo')
1525
1526 def test_non_ascii_drive_letter(self):
1527 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1528
1529 def test_roundtrip_url2pathname(self):
1530 list_of_paths = ['C:',
1531 r'\\\C\test\\',
1532 r'C:\foo\bar\spam.foo'
1533 ]
1534 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001535 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001536
1537class PathName2URLTests(unittest.TestCase):
1538
1539 def test_converting_drive_letter(self):
1540 self.assertEqual(pathname2url("C:"), '///C:')
1541 self.assertEqual(pathname2url("C:\\"), '///C:')
1542
1543 def test_converting_when_no_drive_letter(self):
1544 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1545 '/////folder/test/')
1546 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1547 '////folder/test/')
1548 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1549 '/folder/test/')
1550
1551 def test_simple_compare(self):
1552 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1553 "///C:/foo/bar/spam.foo" )
1554
1555 def test_long_drive_letter(self):
1556 self.assertRaises(IOError, pathname2url, "XX:\\")
1557
1558 def test_roundtrip_pathname2url(self):
1559 list_of_paths = ['///C:',
1560 '/////folder/test/',
1561 '///C:/foo/bar/spam.foo']
1562 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001563 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001564
1565if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001566 unittest.main()