blob: 43ea6b8b57e5b442ad226a65dc3f100aca201ee2 [file] [log] [blame]
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07001"""Regression tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030059def fakehttp(fakedata):
60 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
93 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030094
95 return FakeHTTPConnection
96
97
Senthil Kumarance260142011-11-01 01:35:17 +080098class FakeHTTPMixin(object):
99 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +0800100 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300101 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +0800102
103 def unfakehttp(self):
104 http.client.HTTPConnection = self._connection_class
105
106
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700107class FakeFTPMixin(object):
108 def fakeftp(self):
109 class FakeFtpWrapper(object):
110 def __init__(self, user, passwd, host, port, dirs, timeout=None,
111 persistent=True):
112 pass
113
114 def retrfile(self, file, type):
115 return io.BytesIO(), 0
116
117 def close(self):
118 pass
119
120 self._ftpwrapper_class = urllib.request.ftpwrapper
121 urllib.request.ftpwrapper = FakeFtpWrapper
122
123 def unfakeftp(self):
124 urllib.request.ftpwrapper = self._ftpwrapper_class
125
126
Brett Cannon74bfd702003-04-25 09:39:47 +0000127class urlopen_FileTests(unittest.TestCase):
128 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000129
Brett Cannon74bfd702003-04-25 09:39:47 +0000130 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000131 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000132
Brett Cannon74bfd702003-04-25 09:39:47 +0000133 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000134
Brett Cannon74bfd702003-04-25 09:39:47 +0000135 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 # Create a temp file to use for testing
137 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
138 "ascii")
139 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000140 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000143 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000144 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000146
Brett Cannon74bfd702003-04-25 09:39:47 +0000147 def tearDown(self):
148 """Shut down the open object"""
149 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000150 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000151
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 def test_interface(self):
153 # Make sure object returned by urlopen() has the specified methods
154 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000155 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000156 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000157 "object returned by urlopen() lacks %s attribute" %
158 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000159
Brett Cannon74bfd702003-04-25 09:39:47 +0000160 def test_read(self):
161 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000162
Brett Cannon74bfd702003-04-25 09:39:47 +0000163 def test_readline(self):
164 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000165 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "calling readline() after exhausting the file did not"
167 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_readlines(self):
170 lines_list = self.returned_obj.readlines()
171 self.assertEqual(len(lines_list), 1,
172 "readlines() returned the wrong number of lines")
173 self.assertEqual(lines_list[0], self.text,
174 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000175
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 def test_fileno(self):
177 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000178 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000179 self.assertEqual(os.read(file_num, len(self.text)), self.text,
180 "Reading on the file descriptor returned by fileno() "
181 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800184 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 # by the tearDown() method for the test
186 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000187
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000189 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000190
Brett Cannon74bfd702003-04-25 09:39:47 +0000191 def test_geturl(self):
192 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000193
Christian Heimes9bd667a2008-01-20 15:14:11 +0000194 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000195 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_iter(self):
198 # Test iterator
199 # Don't need to count number of iterations since test would fail the
200 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200201 # comparison.
202 # Use the iterator in the usual implicit way to test for ticket #4608.
203 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000204 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000205
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800206 def test_relativelocalfile(self):
207 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
208
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000209class ProxyTests(unittest.TestCase):
210
211 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000212 # Records changes to env vars
213 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000214 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000215 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000216 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000217 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000218
219 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000220 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000221 self.env.__exit__()
222 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000223
224 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000225 self.env.set('NO_PROXY', 'localhost')
226 proxies = urllib.request.getproxies_environment()
227 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000228 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800229 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700230 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800231 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700232 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
233 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
234
Senthil Kumaran4cbb23f2016-07-30 23:24:16 -0700235 def test_proxy_cgi_ignore(self):
236 try:
237 self.env.set('HTTP_PROXY', 'http://somewhere:3128')
238 proxies = urllib.request.getproxies_environment()
239 self.assertEqual('http://somewhere:3128', proxies['http'])
240 self.env.set('REQUEST_METHOD', 'GET')
241 proxies = urllib.request.getproxies_environment()
242 self.assertNotIn('http', proxies)
243 finally:
244 self.env.unset('REQUEST_METHOD')
245 self.env.unset('HTTP_PROXY')
246
Martin Panteraa279822016-04-30 01:03:40 +0000247 def test_proxy_bypass_environment_host_match(self):
248 bypass = urllib.request.proxy_bypass_environment
249 self.env.set('NO_PROXY',
250 'localhost, anotherdomain.com, newdomain.com:1234')
251 self.assertTrue(bypass('localhost'))
252 self.assertTrue(bypass('LocalHost')) # MixedCase
253 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
254 self.assertTrue(bypass('newdomain.com:1234'))
255 self.assertTrue(bypass('anotherdomain.com:8888'))
256 self.assertTrue(bypass('www.newdomain.com:1234'))
257 self.assertFalse(bypass('prelocalhost'))
258 self.assertFalse(bypass('newdomain.com')) # no port
259 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700260
261class ProxyTests_withOrderedEnv(unittest.TestCase):
262
263 def setUp(self):
264 # We need to test conditions, where variable order _is_ significant
265 self._saved_env = os.environ
266 # Monkey patch os.environ, start with empty fake environment
267 os.environ = collections.OrderedDict()
268
269 def tearDown(self):
270 os.environ = self._saved_env
271
272 def test_getproxies_environment_prefer_lowercase(self):
273 # Test lowercase preference with removal
274 os.environ['no_proxy'] = ''
275 os.environ['No_Proxy'] = 'localhost'
276 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
277 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
278 os.environ['http_proxy'] = ''
279 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
280 proxies = urllib.request.getproxies_environment()
281 self.assertEqual({}, proxies)
282 # Test lowercase preference of proxy bypass and correct matching including ports
283 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
284 os.environ['No_Proxy'] = 'xyz.com'
285 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
286 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
287 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
288 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
289 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
290 # Test lowercase preference with replacement
291 os.environ['http_proxy'] = 'http://somewhere:3128'
292 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
293 proxies = urllib.request.getproxies_environment()
294 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000295
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700296class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000297 """Test urlopen() opening a fake http connection."""
298
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000299 def check_read(self, ver):
300 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000301 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000302 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000303 self.assertEqual(fp.readline(), b"Hello!")
304 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000305 self.assertEqual(fp.geturl(), 'http://python.org/')
306 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000307 finally:
308 self.unfakehttp()
309
Senthil Kumaran26430412011-04-13 07:01:19 +0800310 def test_url_fragment(self):
311 # Issue #11703: geturl() omits fragments in the original URL.
312 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800313 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800314 try:
315 fp = urllib.request.urlopen(url)
316 self.assertEqual(fp.geturl(), url)
317 finally:
318 self.unfakehttp()
319
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800320 def test_willclose(self):
321 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800322 try:
323 resp = urlopen("http://www.python.org")
324 self.assertTrue(resp.fp.will_close)
325 finally:
326 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800327
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000328 def test_read_0_9(self):
329 # "0.9" response accepted (but not "simple responses" without
330 # a status line)
331 self.check_read(b"0.9")
332
333 def test_read_1_0(self):
334 self.check_read(b"1.0")
335
336 def test_read_1_1(self):
337 self.check_read(b"1.1")
338
Christian Heimes57dddfb2008-01-02 18:30:52 +0000339 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200340 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000341 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
342Date: Wed, 02 Jan 2008 03:03:54 GMT
343Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
344Connection: close
345Content-Type: text/html; charset=iso-8859-1
346''')
347 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200348 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000349 finally:
350 self.unfakehttp()
351
guido@google.coma119df92011-03-29 11:41:02 -0700352 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200353 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700354 self.fakehttp(b'''HTTP/1.1 302 Found
355Date: Wed, 02 Jan 2008 03:03:54 GMT
356Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
357Location: file://guidocomputer.athome.com:/python/license
358Connection: close
359Content-Type: text/html; charset=iso-8859-1
360''')
361 try:
Martin Pantera0370222016-02-04 06:01:35 +0000362 msg = "Redirection to url 'file:"
363 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
364 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700365 finally:
366 self.unfakehttp()
367
Martin Pantera0370222016-02-04 06:01:35 +0000368 def test_redirect_limit_independent(self):
369 # Ticket #12923: make sure independent requests each use their
370 # own retry limit.
371 for i in range(FancyURLopener().maxtries):
372 self.fakehttp(b'''HTTP/1.1 302 Found
373Location: file://guidocomputer.athome.com:/python/license
374Connection: close
375''')
376 try:
377 self.assertRaises(urllib.error.HTTPError, urlopen,
378 "http://something")
379 finally:
380 self.unfakehttp()
381
Guido van Rossumd8faa362007-04-27 19:54:29 +0000382 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000384 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000385 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000386 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200387 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000388 finally:
389 self.unfakehttp()
390
Senthil Kumaranf5776862012-10-21 13:30:02 -0700391 def test_missing_localfile(self):
392 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700393 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700394 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700395 self.assertTrue(e.exception.filename)
396 self.assertTrue(e.exception.reason)
397
398 def test_file_notexists(self):
399 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700400 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700401 try:
402 self.assertTrue(os.path.exists(tmp_file))
403 with urlopen(tmp_fileurl) as fobj:
404 self.assertTrue(fobj)
405 finally:
406 os.close(fd)
407 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700408 self.assertFalse(os.path.exists(tmp_file))
409 with self.assertRaises(urllib.error.URLError):
410 urlopen(tmp_fileurl)
411
412 def test_ftp_nohost(self):
413 test_ftp_url = 'ftp:///path'
414 with self.assertRaises(urllib.error.URLError) as e:
415 urlopen(test_ftp_url)
416 self.assertFalse(e.exception.filename)
417 self.assertTrue(e.exception.reason)
418
419 def test_ftp_nonexisting(self):
420 with self.assertRaises(urllib.error.URLError) as e:
421 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
422 self.assertFalse(e.exception.filename)
423 self.assertTrue(e.exception.reason)
424
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700425 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
426 def test_ftp_cache_pruning(self):
427 self.fakeftp()
428 try:
429 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
430 urlopen('ftp://localhost')
431 finally:
432 self.unfakeftp()
433
Senthil Kumaranf5776862012-10-21 13:30:02 -0700434
Senthil Kumarande0eb242010-08-01 17:53:37 +0000435 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000436 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000437 try:
438 fp = urlopen("http://user:pass@python.org/")
439 self.assertEqual(fp.readline(), b"Hello!")
440 self.assertEqual(fp.readline(), b"")
441 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
442 self.assertEqual(fp.getcode(), 200)
443 finally:
444 self.unfakehttp()
445
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800446 def test_userpass_inurl_w_spaces(self):
447 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
448 try:
449 userpass = "a b:c d"
450 url = "http://{}@python.org/".format(userpass)
451 fakehttp_wrapper = http.client.HTTPConnection
452 authorization = ("Authorization: Basic %s\r\n" %
453 b64encode(userpass.encode("ASCII")).decode("ASCII"))
454 fp = urlopen(url)
455 # The authorization header must be in place
456 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
457 self.assertEqual(fp.readline(), b"Hello!")
458 self.assertEqual(fp.readline(), b"")
459 # the spaces are quoted in URL so no match
460 self.assertNotEqual(fp.geturl(), url)
461 self.assertEqual(fp.getcode(), 200)
462 finally:
463 self.unfakehttp()
464
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700465 def test_URLopener_deprecation(self):
466 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700467 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700468
Antoine Pitrou07df6552014-11-02 17:23:14 +0100469 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800470 def test_cafile_and_context(self):
471 context = ssl.create_default_context()
Christian Heimesd0486372016-09-10 23:23:33 +0200472 with support.check_warnings(('', DeprecationWarning)):
473 with self.assertRaises(ValueError):
474 urllib.request.urlopen(
475 "https://localhost", cafile="/nonexistent/path", context=context
476 )
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800477
Antoine Pitroudf204be2012-11-24 17:59:08 +0100478class urlopen_DataTests(unittest.TestCase):
479 """Test urlopen() opening a data URL."""
480
481 def setUp(self):
482 # text containing URL special- and unicode-characters
483 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
484 # 2x1 pixel RGB PNG image with one black and one white pixel
485 self.image = (
486 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
487 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
488 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
489 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
490
491 self.text_url = (
492 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
493 "D%26%20%C3%B6%20%C3%84%20")
494 self.text_url_base64 = (
495 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
496 "sJT0mIPYgxCA%3D")
497 # base64 encoded data URL that contains ignorable spaces,
498 # such as "\n", " ", "%0A", and "%20".
499 self.image_url = (
500 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
501 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
502 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
503
504 self.text_url_resp = urllib.request.urlopen(self.text_url)
505 self.text_url_base64_resp = urllib.request.urlopen(
506 self.text_url_base64)
507 self.image_url_resp = urllib.request.urlopen(self.image_url)
508
509 def test_interface(self):
510 # Make sure object returned by urlopen() has the specified methods
511 for attr in ("read", "readline", "readlines",
512 "close", "info", "geturl", "getcode", "__iter__"):
513 self.assertTrue(hasattr(self.text_url_resp, attr),
514 "object returned by urlopen() lacks %s attribute" %
515 attr)
516
517 def test_info(self):
518 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
519 self.assertEqual(self.text_url_base64_resp.info().get_params(),
520 [('text/plain', ''), ('charset', 'ISO-8859-1')])
521 self.assertEqual(self.image_url_resp.info()['content-length'],
522 str(len(self.image)))
523 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
524 [('text/plain', ''), ('charset', 'US-ASCII')])
525
526 def test_geturl(self):
527 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
528 self.assertEqual(self.text_url_base64_resp.geturl(),
529 self.text_url_base64)
530 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
531
532 def test_read_text(self):
533 self.assertEqual(self.text_url_resp.read().decode(
534 dict(self.text_url_resp.info().get_params())['charset']), self.text)
535
536 def test_read_text_base64(self):
537 self.assertEqual(self.text_url_base64_resp.read().decode(
538 dict(self.text_url_base64_resp.info().get_params())['charset']),
539 self.text)
540
541 def test_read_image(self):
542 self.assertEqual(self.image_url_resp.read(), self.image)
543
544 def test_missing_comma(self):
545 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
546
547 def test_invalid_base64_data(self):
548 # missing padding character
549 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
550
Brett Cannon19691362003-04-29 05:08:06 +0000551class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000552 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000553
Brett Cannon19691362003-04-29 05:08:06 +0000554 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000555 # Create a list of temporary files. Each item in the list is a file
556 # name (absolute path or relative to the current working directory).
557 # All files in this list will be deleted in the tearDown method. Note,
558 # this only helps to makes sure temporary files get deleted, but it
559 # does nothing about trying to close files that may still be open. It
560 # is the responsibility of the developer to properly close files even
561 # when exceptional conditions occur.
562 self.tempFiles = []
563
Brett Cannon19691362003-04-29 05:08:06 +0000564 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000565 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000566 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000567 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000568 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000569 FILE.write(self.text)
570 FILE.close()
571 finally:
572 try: FILE.close()
573 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000574
575 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000576 # Delete the temporary files.
577 for each in self.tempFiles:
578 try: os.remove(each)
579 except: pass
580
581 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000582 filePath = os.path.abspath(filePath)
583 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000584 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000585 except UnicodeEncodeError:
586 raise unittest.SkipTest("filePath is not encodable to utf8")
587 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000588
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000589 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000590 """Creates a new temporary file containing the specified data,
591 registers the file for deletion during the test fixture tear down, and
592 returns the absolute path of the file."""
593
594 newFd, newFilePath = tempfile.mkstemp()
595 try:
596 self.registerFileForCleanUp(newFilePath)
597 newFile = os.fdopen(newFd, "wb")
598 newFile.write(data)
599 newFile.close()
600 finally:
601 try: newFile.close()
602 except: pass
603 return newFilePath
604
605 def registerFileForCleanUp(self, fileName):
606 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000607
608 def test_basic(self):
609 # Make sure that a local file just gets its own location returned and
610 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000611 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000612 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000613 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000614 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000615 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000616
617 def test_copy(self):
618 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000619 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000620 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000621 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000622 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000623 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000624 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000625 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000626 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000627 try:
628 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000629 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000630 finally:
631 try: FILE.close()
632 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000633 self.assertEqual(self.text, text)
634
635 def test_reporthook(self):
636 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700637 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
638 self.assertIsInstance(block_count, int)
639 self.assertIsInstance(block_read_size, int)
640 self.assertIsInstance(file_size, int)
641 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000642 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000643 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000644 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000645 urllib.request.urlretrieve(
646 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000647 second_temp, hooktester)
648
649 def test_reporthook_0_bytes(self):
650 # Test on zero length file. Should call reporthook only 1 time.
651 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700652 def hooktester(block_count, block_read_size, file_size, _report=report):
653 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000654 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000655 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000656 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000657 self.assertEqual(len(report), 1)
658 self.assertEqual(report[0][2], 0)
659
660 def test_reporthook_5_bytes(self):
661 # Test on 5 byte file. Should call reporthook only 2 times (once when
662 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700663 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000664 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700665 def hooktester(block_count, block_read_size, file_size, _report=report):
666 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000667 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000668 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000669 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000670 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800671 self.assertEqual(report[0][2], 5)
672 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000673
674 def test_reporthook_8193_bytes(self):
675 # Test on 8193 byte file. Should call reporthook only 3 times (once
676 # when the "network connection" is established, once for the next 8192
677 # bytes, and once for the last byte).
678 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700679 def hooktester(block_count, block_read_size, file_size, _report=report):
680 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000681 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000682 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000683 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000684 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800685 self.assertEqual(report[0][2], 8193)
686 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700687 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800688 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000689
Senthil Kumarance260142011-11-01 01:35:17 +0800690
691class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
692 """Test urllib.urlretrieve() using fake http connections"""
693
694 def test_short_content_raises_ContentTooShortError(self):
695 self.fakehttp(b'''HTTP/1.1 200 OK
696Date: Wed, 02 Jan 2008 03:03:54 GMT
697Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
698Connection: close
699Content-Length: 100
700Content-Type: text/html; charset=iso-8859-1
701
702FF
703''')
704
705 def _reporthook(par1, par2, par3):
706 pass
707
708 with self.assertRaises(urllib.error.ContentTooShortError):
709 try:
710 urllib.request.urlretrieve('http://example.com/',
711 reporthook=_reporthook)
712 finally:
713 self.unfakehttp()
714
715 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
716 self.fakehttp(b'''HTTP/1.1 200 OK
717Date: Wed, 02 Jan 2008 03:03:54 GMT
718Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
719Connection: close
720Content-Length: 100
721Content-Type: text/html; charset=iso-8859-1
722
723FF
724''')
725 with self.assertRaises(urllib.error.ContentTooShortError):
726 try:
727 urllib.request.urlretrieve('http://example.com/')
728 finally:
729 self.unfakehttp()
730
731
Brett Cannon74bfd702003-04-25 09:39:47 +0000732class QuotingTests(unittest.TestCase):
R David Murray44b548d2016-09-08 13:59:53 -0400733 r"""Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000734
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000735 According to RFC 2396 (Uniform Resource Identifiers), to escape a
736 character you write it as '%' + <2 character US-ASCII hex value>.
737 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
738 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000739
740 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000741
Brett Cannon74bfd702003-04-25 09:39:47 +0000742 Reserved characters : ";/?:@&=+$,"
743 Have special meaning in URIs and must be escaped if not being used for
744 their special meaning
745 Data characters : letters, digits, and "-_.!~*'()"
746 Unreserved and do not need to be escaped; can be, though, if desired
747 Control characters : 0x00 - 0x1F, 0x7F
748 Have no use in URIs so must be escaped
749 space : 0x20
750 Must be escaped
751 Delimiters : '<>#%"'
752 Must be escaped
753 Unwise : "{}|\^[]`"
754 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000755
Brett Cannon74bfd702003-04-25 09:39:47 +0000756 """
757
758 def test_never_quote(self):
759 # Make sure quote() does not quote letters, digits, and "_,.-"
760 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
761 "abcdefghijklmnopqrstuvwxyz",
762 "0123456789",
763 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000765 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000766 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000767 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000768 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000769 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000770
771 def test_default_safe(self):
772 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000773 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000774
775 def test_safe(self):
776 # Test setting 'safe' parameter does what it should do
777 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000778 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000779 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000780 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000781 result = urllib.parse.quote_plus(quote_by_default,
782 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000783 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000784 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000785 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000786 # Safe expressed as bytes rather than str
787 result = urllib.parse.quote(quote_by_default, safe=b"<>")
788 self.assertEqual(quote_by_default, result,
789 "using quote(): %r != %r" % (quote_by_default, result))
790 # "Safe" non-ASCII characters should have no effect
791 # (Since URIs are not allowed to have non-ASCII characters)
792 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
793 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
794 self.assertEqual(expect, result,
795 "using quote(): %r != %r" %
796 (expect, result))
797 # Same as above, but using a bytes rather than str
798 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
799 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
800 self.assertEqual(expect, result,
801 "using quote(): %r != %r" %
802 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000803
804 def test_default_quoting(self):
805 # Make sure all characters that should be quoted are by default sans
806 # space (separate test for that).
807 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
R David Murray44b548d2016-09-08 13:59:53 -0400808 should_quote.append(r'<>#%"{}|\^[]`')
Brett Cannon74bfd702003-04-25 09:39:47 +0000809 should_quote.append(chr(127)) # For 0x7F
810 should_quote = ''.join(should_quote)
811 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000812 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000813 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000814 "using quote(): "
815 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000816 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000817 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000818 self.assertEqual(hexescape(char), result,
819 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000820 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000821 (char, hexescape(char), result))
822 del should_quote
823 partial_quote = "ab[]cd"
824 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000826 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000827 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800828 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000829 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000830 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000831
832 def test_quoting_space(self):
833 # Make sure quote() and quote_plus() handle spaces as specified in
834 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000835 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000836 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000837 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000838 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000839 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000840 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000841 given = "a b cd e f"
842 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000843 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000844 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000845 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000846 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000847 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000848 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000849 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000850
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000851 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000852 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000853 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000854 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000855 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000856 # Test with bytes
857 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
858 'alpha%2Bbeta+gamma')
859 # Test with safe bytes
860 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
861 'alpha+beta+gamma')
862
863 def test_quote_bytes(self):
864 # Bytes should quote directly to percent-encoded values
865 given = b"\xa2\xd8ab\xff"
866 expect = "%A2%D8ab%FF"
867 result = urllib.parse.quote(given)
868 self.assertEqual(expect, result,
869 "using quote(): %r != %r" % (expect, result))
870 # Encoding argument should raise type error on bytes input
871 self.assertRaises(TypeError, urllib.parse.quote, given,
872 encoding="latin-1")
873 # quote_from_bytes should work the same
874 result = urllib.parse.quote_from_bytes(given)
875 self.assertEqual(expect, result,
876 "using quote_from_bytes(): %r != %r"
877 % (expect, result))
878
879 def test_quote_with_unicode(self):
880 # Characters in Latin-1 range, encoded by default in UTF-8
881 given = "\xa2\xd8ab\xff"
882 expect = "%C2%A2%C3%98ab%C3%BF"
883 result = urllib.parse.quote(given)
884 self.assertEqual(expect, result,
885 "using quote(): %r != %r" % (expect, result))
886 # Characters in Latin-1 range, encoded by with None (default)
887 result = urllib.parse.quote(given, encoding=None, errors=None)
888 self.assertEqual(expect, result,
889 "using quote(): %r != %r" % (expect, result))
890 # Characters in Latin-1 range, encoded with Latin-1
891 given = "\xa2\xd8ab\xff"
892 expect = "%A2%D8ab%FF"
893 result = urllib.parse.quote(given, encoding="latin-1")
894 self.assertEqual(expect, result,
895 "using quote(): %r != %r" % (expect, result))
896 # Characters in BMP, encoded by default in UTF-8
897 given = "\u6f22\u5b57" # "Kanji"
898 expect = "%E6%BC%A2%E5%AD%97"
899 result = urllib.parse.quote(given)
900 self.assertEqual(expect, result,
901 "using quote(): %r != %r" % (expect, result))
902 # Characters in BMP, encoded with Latin-1
903 given = "\u6f22\u5b57"
904 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
905 encoding="latin-1")
906 # Characters in BMP, encoded with Latin-1, with replace error handling
907 given = "\u6f22\u5b57"
908 expect = "%3F%3F" # "??"
909 result = urllib.parse.quote(given, encoding="latin-1",
910 errors="replace")
911 self.assertEqual(expect, result,
912 "using quote(): %r != %r" % (expect, result))
913 # Characters in BMP, Latin-1, with xmlcharref error handling
914 given = "\u6f22\u5b57"
915 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
916 result = urllib.parse.quote(given, encoding="latin-1",
917 errors="xmlcharrefreplace")
918 self.assertEqual(expect, result,
919 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000920
Georg Brandlfaf41492009-05-26 18:31:11 +0000921 def test_quote_plus_with_unicode(self):
922 # Encoding (latin-1) test for quote_plus
923 given = "\xa2\xd8 \xff"
924 expect = "%A2%D8+%FF"
925 result = urllib.parse.quote_plus(given, encoding="latin-1")
926 self.assertEqual(expect, result,
927 "using quote_plus(): %r != %r" % (expect, result))
928 # Errors test for quote_plus
929 given = "ab\u6f22\u5b57 cd"
930 expect = "ab%3F%3F+cd"
931 result = urllib.parse.quote_plus(given, encoding="latin-1",
932 errors="replace")
933 self.assertEqual(expect, result,
934 "using quote_plus(): %r != %r" % (expect, result))
935
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000936
Brett Cannon74bfd702003-04-25 09:39:47 +0000937class UnquotingTests(unittest.TestCase):
938 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000939
Brett Cannon74bfd702003-04-25 09:39:47 +0000940 See the doc string for quoting_Tests for details on quoting and such.
941
942 """
943
944 def test_unquoting(self):
945 # Make sure unquoting of all ASCII values works
946 escape_list = []
947 for num in range(128):
948 given = hexescape(chr(num))
949 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000950 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000951 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000952 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000953 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000954 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000955 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000956 (expect, result))
957 escape_list.append(given)
958 escape_string = ''.join(escape_list)
959 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000960 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000961 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000962 "using unquote(): not all characters escaped: "
963 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000964 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
965 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000966 with support.check_warnings(('', BytesWarning), quiet=True):
967 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000968
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000969 def test_unquoting_badpercent(self):
970 # Test unquoting on bad percent-escapes
971 given = '%xab'
972 expect = given
973 result = urllib.parse.unquote(given)
974 self.assertEqual(expect, result, "using unquote(): %r != %r"
975 % (expect, result))
976 given = '%x'
977 expect = given
978 result = urllib.parse.unquote(given)
979 self.assertEqual(expect, result, "using unquote(): %r != %r"
980 % (expect, result))
981 given = '%'
982 expect = given
983 result = urllib.parse.unquote(given)
984 self.assertEqual(expect, result, "using unquote(): %r != %r"
985 % (expect, result))
986 # unquote_to_bytes
987 given = '%xab'
988 expect = bytes(given, 'ascii')
989 result = urllib.parse.unquote_to_bytes(given)
990 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
991 % (expect, result))
992 given = '%x'
993 expect = bytes(given, 'ascii')
994 result = urllib.parse.unquote_to_bytes(given)
995 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
996 % (expect, result))
997 given = '%'
998 expect = bytes(given, 'ascii')
999 result = urllib.parse.unquote_to_bytes(given)
1000 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1001 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +00001002 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1003 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +00001004
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001005 def test_unquoting_mixed_case(self):
1006 # Test unquoting on mixed-case hex digits in the percent-escapes
1007 given = '%Ab%eA'
1008 expect = b'\xab\xea'
1009 result = urllib.parse.unquote_to_bytes(given)
1010 self.assertEqual(expect, result,
1011 "using unquote_to_bytes(): %r != %r"
1012 % (expect, result))
1013
Brett Cannon74bfd702003-04-25 09:39:47 +00001014 def test_unquoting_parts(self):
1015 # Make sure unquoting works when have non-quoted characters
1016 # interspersed
1017 given = 'ab%sd' % hexescape('c')
1018 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001019 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001020 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001021 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001022 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001023 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001024 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001025
Brett Cannon74bfd702003-04-25 09:39:47 +00001026 def test_unquoting_plus(self):
1027 # Test difference between unquote() and unquote_plus()
1028 given = "are+there+spaces..."
1029 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001030 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001031 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001032 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001033 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001034 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001035 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001036 "using unquote_plus(): %r != %r" % (expect, result))
1037
1038 def test_unquote_to_bytes(self):
1039 given = 'br%C3%BCckner_sapporo_20050930.doc'
1040 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1041 result = urllib.parse.unquote_to_bytes(given)
1042 self.assertEqual(expect, result,
1043 "using unquote_to_bytes(): %r != %r"
1044 % (expect, result))
1045 # Test on a string with unescaped non-ASCII characters
1046 # (Technically an invalid URI; expect those characters to be UTF-8
1047 # encoded).
1048 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1049 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1050 self.assertEqual(expect, result,
1051 "using unquote_to_bytes(): %r != %r"
1052 % (expect, result))
1053 # Test with a bytes as input
1054 given = b'%A2%D8ab%FF'
1055 expect = b'\xa2\xd8ab\xff'
1056 result = urllib.parse.unquote_to_bytes(given)
1057 self.assertEqual(expect, result,
1058 "using unquote_to_bytes(): %r != %r"
1059 % (expect, result))
1060 # Test with a bytes as input, with unescaped non-ASCII bytes
1061 # (Technically an invalid URI; expect those bytes to be preserved)
1062 given = b'%A2\xd8ab%FF'
1063 expect = b'\xa2\xd8ab\xff'
1064 result = urllib.parse.unquote_to_bytes(given)
1065 self.assertEqual(expect, result,
1066 "using unquote_to_bytes(): %r != %r"
1067 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001068
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001069 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001070 # Characters in the Latin-1 range, encoded with UTF-8
1071 given = 'br%C3%BCckner_sapporo_20050930.doc'
1072 expect = 'br\u00fcckner_sapporo_20050930.doc'
1073 result = urllib.parse.unquote(given)
1074 self.assertEqual(expect, result,
1075 "using unquote(): %r != %r" % (expect, result))
1076 # Characters in the Latin-1 range, encoded with None (default)
1077 result = urllib.parse.unquote(given, encoding=None, errors=None)
1078 self.assertEqual(expect, result,
1079 "using unquote(): %r != %r" % (expect, result))
1080
1081 # Characters in the Latin-1 range, encoded with Latin-1
1082 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1083 encoding="latin-1")
1084 expect = 'br\u00fcckner_sapporo_20050930.doc'
1085 self.assertEqual(expect, result,
1086 "using unquote(): %r != %r" % (expect, result))
1087
1088 # Characters in BMP, encoded with UTF-8
1089 given = "%E6%BC%A2%E5%AD%97"
1090 expect = "\u6f22\u5b57" # "Kanji"
1091 result = urllib.parse.unquote(given)
1092 self.assertEqual(expect, result,
1093 "using unquote(): %r != %r" % (expect, result))
1094
1095 # Decode with UTF-8, invalid sequence
1096 given = "%F3%B1"
1097 expect = "\ufffd" # Replacement character
1098 result = urllib.parse.unquote(given)
1099 self.assertEqual(expect, result,
1100 "using unquote(): %r != %r" % (expect, result))
1101
1102 # Decode with UTF-8, invalid sequence, replace errors
1103 result = urllib.parse.unquote(given, errors="replace")
1104 self.assertEqual(expect, result,
1105 "using unquote(): %r != %r" % (expect, result))
1106
1107 # Decode with UTF-8, invalid sequence, ignoring errors
1108 given = "%F3%B1"
1109 expect = ""
1110 result = urllib.parse.unquote(given, errors="ignore")
1111 self.assertEqual(expect, result,
1112 "using unquote(): %r != %r" % (expect, result))
1113
1114 # A mix of non-ASCII and percent-encoded characters, UTF-8
1115 result = urllib.parse.unquote("\u6f22%C3%BC")
1116 expect = '\u6f22\u00fc'
1117 self.assertEqual(expect, result,
1118 "using unquote(): %r != %r" % (expect, result))
1119
1120 # A mix of non-ASCII and percent-encoded characters, Latin-1
1121 # (Note, the string contains non-Latin-1-representable characters)
1122 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1123 expect = '\u6f22\u00fc'
1124 self.assertEqual(expect, result,
1125 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001126
Brett Cannon74bfd702003-04-25 09:39:47 +00001127class urlencode_Tests(unittest.TestCase):
1128 """Tests for urlencode()"""
1129
1130 def help_inputtype(self, given, test_type):
1131 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001132
Brett Cannon74bfd702003-04-25 09:39:47 +00001133 'given' must lead to only the pairs:
1134 * 1st, 1
1135 * 2nd, 2
1136 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001137
Brett Cannon74bfd702003-04-25 09:39:47 +00001138 Test cannot assume anything about order. Docs make no guarantee and
1139 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001140
Brett Cannon74bfd702003-04-25 09:39:47 +00001141 """
1142 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001143 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001144 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001145 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001146 "testing %s: %s not found in %s" %
1147 (test_type, expected, result))
1148 self.assertEqual(result.count('&'), 2,
1149 "testing %s: expected 2 '&'s; got %s" %
1150 (test_type, result.count('&')))
1151 amp_location = result.index('&')
1152 on_amp_left = result[amp_location - 1]
1153 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001154 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001155 "testing %s: '&' not located in proper place in %s" %
1156 (test_type, result))
1157 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1158 "testing %s: "
1159 "unexpected number of characters: %s != %s" %
1160 (test_type, len(result), (5 * 3) + 2))
1161
1162 def test_using_mapping(self):
1163 # Test passing in a mapping object as an argument.
1164 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1165 "using dict as input type")
1166
1167 def test_using_sequence(self):
1168 # Test passing in a sequence of two-item sequences as an argument.
1169 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1170 "using sequence of two-item tuples as input")
1171
1172 def test_quoting(self):
1173 # Make sure keys and values are quoted using quote_plus()
1174 given = {"&":"="}
1175 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001176 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001177 self.assertEqual(expect, result)
1178 given = {"key name":"A bunch of pluses"}
1179 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001180 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001181 self.assertEqual(expect, result)
1182
1183 def test_doseq(self):
1184 # Test that passing True for 'doseq' parameter works correctly
1185 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001186 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1187 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001188 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001189 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001190 for value in given["sequence"]:
1191 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001192 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001193 self.assertEqual(result.count('&'), 2,
1194 "Expected 2 '&'s, got %s" % result.count('&'))
1195
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001196 def test_empty_sequence(self):
1197 self.assertEqual("", urllib.parse.urlencode({}))
1198 self.assertEqual("", urllib.parse.urlencode([]))
1199
1200 def test_nonstring_values(self):
1201 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1202 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1203
1204 def test_nonstring_seq_values(self):
1205 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1206 self.assertEqual("a=None&a=a",
1207 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001208 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001209 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001210 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001211
Senthil Kumarandf022da2010-07-03 17:48:22 +00001212 def test_urlencode_encoding(self):
1213 # ASCII encoding. Expect %3F with errors="replace'
1214 given = (('\u00a0', '\u00c1'),)
1215 expect = '%3F=%3F'
1216 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1217 self.assertEqual(expect, result)
1218
1219 # Default is UTF-8 encoding.
1220 given = (('\u00a0', '\u00c1'),)
1221 expect = '%C2%A0=%C3%81'
1222 result = urllib.parse.urlencode(given)
1223 self.assertEqual(expect, result)
1224
1225 # Latin-1 encoding.
1226 given = (('\u00a0', '\u00c1'),)
1227 expect = '%A0=%C1'
1228 result = urllib.parse.urlencode(given, encoding="latin-1")
1229 self.assertEqual(expect, result)
1230
1231 def test_urlencode_encoding_doseq(self):
1232 # ASCII Encoding. Expect %3F with errors="replace'
1233 given = (('\u00a0', '\u00c1'),)
1234 expect = '%3F=%3F'
1235 result = urllib.parse.urlencode(given, doseq=True,
1236 encoding="ASCII", errors="replace")
1237 self.assertEqual(expect, result)
1238
1239 # ASCII Encoding. On a sequence of values.
1240 given = (("\u00a0", (1, "\u00c1")),)
1241 expect = '%3F=1&%3F=%3F'
1242 result = urllib.parse.urlencode(given, True,
1243 encoding="ASCII", errors="replace")
1244 self.assertEqual(expect, result)
1245
1246 # Utf-8
1247 given = (("\u00a0", "\u00c1"),)
1248 expect = '%C2%A0=%C3%81'
1249 result = urllib.parse.urlencode(given, True)
1250 self.assertEqual(expect, result)
1251
1252 given = (("\u00a0", (42, "\u00c1")),)
1253 expect = '%C2%A0=42&%C2%A0=%C3%81'
1254 result = urllib.parse.urlencode(given, True)
1255 self.assertEqual(expect, result)
1256
1257 # latin-1
1258 given = (("\u00a0", "\u00c1"),)
1259 expect = '%A0=%C1'
1260 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1261 self.assertEqual(expect, result)
1262
1263 given = (("\u00a0", (42, "\u00c1")),)
1264 expect = '%A0=42&%A0=%C1'
1265 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1266 self.assertEqual(expect, result)
1267
1268 def test_urlencode_bytes(self):
1269 given = ((b'\xa0\x24', b'\xc1\x24'),)
1270 expect = '%A0%24=%C1%24'
1271 result = urllib.parse.urlencode(given)
1272 self.assertEqual(expect, result)
1273 result = urllib.parse.urlencode(given, True)
1274 self.assertEqual(expect, result)
1275
1276 # Sequence of values
1277 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1278 expect = '%A0%24=42&%A0%24=%C1%24'
1279 result = urllib.parse.urlencode(given, True)
1280 self.assertEqual(expect, result)
1281
1282 def test_urlencode_encoding_safe_parameter(self):
1283
1284 # Send '$' (\x24) as safe character
1285 # Default utf-8 encoding
1286
1287 given = ((b'\xa0\x24', b'\xc1\x24'),)
1288 result = urllib.parse.urlencode(given, safe=":$")
1289 expect = '%A0$=%C1$'
1290 self.assertEqual(expect, result)
1291
1292 given = ((b'\xa0\x24', b'\xc1\x24'),)
1293 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1294 expect = '%A0$=%C1$'
1295 self.assertEqual(expect, result)
1296
1297 # Safe parameter in sequence
1298 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1299 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1300 result = urllib.parse.urlencode(given, True, safe=":$")
1301 self.assertEqual(expect, result)
1302
1303 # Test all above in latin-1 encoding
1304
1305 given = ((b'\xa0\x24', b'\xc1\x24'),)
1306 result = urllib.parse.urlencode(given, safe=":$",
1307 encoding="latin-1")
1308 expect = '%A0$=%C1$'
1309 self.assertEqual(expect, result)
1310
1311 given = ((b'\xa0\x24', b'\xc1\x24'),)
1312 expect = '%A0$=%C1$'
1313 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1314 encoding="latin-1")
1315
1316 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1317 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1318 result = urllib.parse.urlencode(given, True, safe=":$",
1319 encoding="latin-1")
1320 self.assertEqual(expect, result)
1321
Brett Cannon74bfd702003-04-25 09:39:47 +00001322class Pathname_Tests(unittest.TestCase):
1323 """Test pathname2url() and url2pathname()"""
1324
1325 def test_basic(self):
1326 # Make sure simple tests pass
1327 expected_path = os.path.join("parts", "of", "a", "path")
1328 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001329 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001330 self.assertEqual(expected_url, result,
1331 "pathname2url() failed; %s != %s" %
1332 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001333 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001334 self.assertEqual(expected_path, result,
1335 "url2pathame() failed; %s != %s" %
1336 (result, expected_path))
1337
1338 def test_quoting(self):
1339 # Test automatic quoting and unquoting works for pathnam2url() and
1340 # url2pathname() respectively
1341 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001342 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1343 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001344 self.assertEqual(expect, result,
1345 "pathname2url() failed; %s != %s" %
1346 (expect, result))
1347 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001348 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001349 self.assertEqual(expect, result,
1350 "url2pathname() failed; %s != %s" %
1351 (expect, result))
1352 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001353 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1354 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001355 self.assertEqual(expect, result,
1356 "pathname2url() failed; %s != %s" %
1357 (expect, result))
1358 given = "make+sure/using_unquote"
1359 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001360 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001361 self.assertEqual(expect, result,
1362 "url2pathname() failed; %s != %s" %
1363 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001364
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001365 @unittest.skipUnless(sys.platform == 'win32',
1366 'test specific to the urllib.url2path function.')
1367 def test_ntpath(self):
1368 given = ('/C:/', '///C:/', '/C|//')
1369 expect = 'C:\\'
1370 for url in given:
1371 result = urllib.request.url2pathname(url)
1372 self.assertEqual(expect, result,
1373 'urllib.request..url2pathname() failed; %s != %s' %
1374 (expect, result))
1375 given = '///C|/path'
1376 expect = 'C:\\path'
1377 result = urllib.request.url2pathname(given)
1378 self.assertEqual(expect, result,
1379 'urllib.request.url2pathname() failed; %s != %s' %
1380 (expect, result))
1381
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001382class Utility_Tests(unittest.TestCase):
1383 """Testcase to test the various utility functions in the urllib."""
1384
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001385 def test_thishost(self):
1386 """Test the urllib.request.thishost utility function returns a tuple"""
1387 self.assertIsInstance(urllib.request.thishost(), tuple)
1388
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001389
1390class URLopener_Tests(unittest.TestCase):
1391 """Testcase to test the open method of URLopener class."""
1392
1393 def test_quoted_open(self):
1394 class DummyURLopener(urllib.request.URLopener):
1395 def open_spam(self, url):
1396 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001397 with support.check_warnings(
1398 ('DummyURLopener style of invoking requests is deprecated.',
1399 DeprecationWarning)):
1400 self.assertEqual(DummyURLopener().open(
1401 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001402
Ezio Melotti79b99db2013-02-21 02:41:42 +02001403 # test the safe characters are not quoted by urlopen
1404 self.assertEqual(DummyURLopener().open(
1405 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1406 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001407
Guido van Rossume7ba4952007-06-06 23:52:48 +00001408# Just commented them out.
1409# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001410# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001411# fail in one of the tests, sometimes in other. I have a linux, and
1412# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001413# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001414# . Facundo
1415#
1416# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001417# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001418# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1419# serv.settimeout(3)
1420# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1421# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001422# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001423# try:
1424# conn, addr = serv.accept()
1425# conn.send("1 Hola mundo\n")
1426# cantdata = 0
1427# while cantdata < 13:
1428# data = conn.recv(13-cantdata)
1429# cantdata += len(data)
1430# time.sleep(.3)
1431# conn.send("2 No more lines\n")
1432# conn.close()
1433# except socket.timeout:
1434# pass
1435# finally:
1436# serv.close()
1437# evt.set()
1438#
1439# class FTPWrapperTests(unittest.TestCase):
1440#
1441# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001442# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001443# ftplib.FTP.port = 9093
1444# self.evt = threading.Event()
1445# threading.Thread(target=server, args=(self.evt,)).start()
1446# time.sleep(.1)
1447#
1448# def tearDown(self):
1449# self.evt.wait()
1450#
1451# def testBasic(self):
1452# # connects
1453# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001454# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001455#
1456# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001457# # global default timeout is ignored
1458# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001459# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001460# socket.setdefaulttimeout(30)
1461# try:
1462# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1463# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001464# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001465# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001466# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001467#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001468# def testTimeoutDefault(self):
1469# # global default timeout is used
1470# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001471# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001472# socket.setdefaulttimeout(30)
1473# try:
1474# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1475# finally:
1476# socket.setdefaulttimeout(None)
1477# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1478# ftp.close()
1479#
1480# def testTimeoutValue(self):
1481# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1482# timeout=30)
1483# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1484# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001485
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001486
Senthil Kumarande49d642011-10-16 23:54:44 +08001487class RequestTests(unittest.TestCase):
1488 """Unit tests for urllib.request.Request."""
1489
1490 def test_default_values(self):
1491 Request = urllib.request.Request
1492 request = Request("http://www.python.org")
1493 self.assertEqual(request.get_method(), 'GET')
1494 request = Request("http://www.python.org", {})
1495 self.assertEqual(request.get_method(), 'POST')
1496
1497 def test_with_method_arg(self):
1498 Request = urllib.request.Request
1499 request = Request("http://www.python.org", method='HEAD')
1500 self.assertEqual(request.method, 'HEAD')
1501 self.assertEqual(request.get_method(), 'HEAD')
1502 request = Request("http://www.python.org", {}, method='HEAD')
1503 self.assertEqual(request.method, 'HEAD')
1504 self.assertEqual(request.get_method(), 'HEAD')
1505 request = Request("http://www.python.org", method='GET')
1506 self.assertEqual(request.get_method(), 'GET')
1507 request.method = 'HEAD'
1508 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001509
1510
Senthil Kumaran277e9092013-04-10 20:51:19 -07001511class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001512
Senthil Kumaran277e9092013-04-10 20:51:19 -07001513 def test_converting_drive_letter(self):
1514 self.assertEqual(url2pathname("///C|"), 'C:')
1515 self.assertEqual(url2pathname("///C:"), 'C:')
1516 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001517
Senthil Kumaran277e9092013-04-10 20:51:19 -07001518 def test_converting_when_no_drive_letter(self):
1519 # cannot end a raw string in \
1520 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1521 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1522
1523 def test_simple_compare(self):
1524 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1525 r'C:\foo\bar\spam.foo')
1526
1527 def test_non_ascii_drive_letter(self):
1528 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1529
1530 def test_roundtrip_url2pathname(self):
1531 list_of_paths = ['C:',
1532 r'\\\C\test\\',
1533 r'C:\foo\bar\spam.foo'
1534 ]
1535 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001536 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001537
1538class PathName2URLTests(unittest.TestCase):
1539
1540 def test_converting_drive_letter(self):
1541 self.assertEqual(pathname2url("C:"), '///C:')
1542 self.assertEqual(pathname2url("C:\\"), '///C:')
1543
1544 def test_converting_when_no_drive_letter(self):
1545 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1546 '/////folder/test/')
1547 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1548 '////folder/test/')
1549 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1550 '/folder/test/')
1551
1552 def test_simple_compare(self):
1553 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1554 "///C:/foo/bar/spam.foo" )
1555
1556 def test_long_drive_letter(self):
1557 self.assertRaises(IOError, pathname2url, "XX:\\")
1558
1559 def test_roundtrip_pathname2url(self):
1560 list_of_paths = ['///C:',
1561 '/////folder/test/',
1562 '///C:/foo/bar/spam.foo']
1563 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001564 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001565
1566if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001567 unittest.main()