blob: 5d05f8d7d26d66041be7c39070adb165939b476b [file] [log] [blame]
Martin Panterce6e0682016-05-16 01:07:13 +00001"""Regresssion tests for what was in Python 2's "urllib" module"""
Brett Cannon74bfd702003-04-25 09:39:47 +00002
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030059def fakehttp(fakedata):
60 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030089
90 def connect(self):
Martin Panterce6e0682016-05-16 01:07:13 +000091 self.sock = FakeSocket(self.fakedata)
92 type(self).fakesock = self.sock
93 FakeHTTPConnection.fakedata = fakedata
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030094
95 return FakeHTTPConnection
96
97
Senthil Kumarance260142011-11-01 01:35:17 +080098class FakeHTTPMixin(object):
99 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +0800100 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300101 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +0800102
103 def unfakehttp(self):
104 http.client.HTTPConnection = self._connection_class
105
106
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700107class FakeFTPMixin(object):
108 def fakeftp(self):
109 class FakeFtpWrapper(object):
110 def __init__(self, user, passwd, host, port, dirs, timeout=None,
111 persistent=True):
112 pass
113
114 def retrfile(self, file, type):
115 return io.BytesIO(), 0
116
117 def close(self):
118 pass
119
120 self._ftpwrapper_class = urllib.request.ftpwrapper
121 urllib.request.ftpwrapper = FakeFtpWrapper
122
123 def unfakeftp(self):
124 urllib.request.ftpwrapper = self._ftpwrapper_class
125
126
Brett Cannon74bfd702003-04-25 09:39:47 +0000127class urlopen_FileTests(unittest.TestCase):
128 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000129
Brett Cannon74bfd702003-04-25 09:39:47 +0000130 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000131 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000132
Brett Cannon74bfd702003-04-25 09:39:47 +0000133 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000134
Brett Cannon74bfd702003-04-25 09:39:47 +0000135 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 # Create a temp file to use for testing
137 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
138 "ascii")
139 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000140 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000143 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000144 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000145 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000146
Brett Cannon74bfd702003-04-25 09:39:47 +0000147 def tearDown(self):
148 """Shut down the open object"""
149 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000150 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000151
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 def test_interface(self):
153 # Make sure object returned by urlopen() has the specified methods
154 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000155 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000156 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000157 "object returned by urlopen() lacks %s attribute" %
158 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000159
Brett Cannon74bfd702003-04-25 09:39:47 +0000160 def test_read(self):
161 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000162
Brett Cannon74bfd702003-04-25 09:39:47 +0000163 def test_readline(self):
164 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000165 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000166 "calling readline() after exhausting the file did not"
167 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_readlines(self):
170 lines_list = self.returned_obj.readlines()
171 self.assertEqual(len(lines_list), 1,
172 "readlines() returned the wrong number of lines")
173 self.assertEqual(lines_list[0], self.text,
174 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000175
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 def test_fileno(self):
177 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000178 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000179 self.assertEqual(os.read(file_num, len(self.text)), self.text,
180 "Reading on the file descriptor returned by fileno() "
181 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800184 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000185 # by the tearDown() method for the test
186 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000187
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000189 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000190
Brett Cannon74bfd702003-04-25 09:39:47 +0000191 def test_geturl(self):
192 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000193
Christian Heimes9bd667a2008-01-20 15:14:11 +0000194 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000195 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 def test_iter(self):
198 # Test iterator
199 # Don't need to count number of iterations since test would fail the
200 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200201 # comparison.
202 # Use the iterator in the usual implicit way to test for ticket #4608.
203 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000204 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000205
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800206 def test_relativelocalfile(self):
207 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
208
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000209class ProxyTests(unittest.TestCase):
210
211 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000212 # Records changes to env vars
213 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000214 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000215 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000216 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000217 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000218
219 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000220 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000221 self.env.__exit__()
222 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000223
224 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000225 self.env.set('NO_PROXY', 'localhost')
226 proxies = urllib.request.getproxies_environment()
227 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000228 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800229 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700230 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800231 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700232 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
233 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
234
Martin Panteraa279822016-04-30 01:03:40 +0000235 def test_proxy_bypass_environment_host_match(self):
236 bypass = urllib.request.proxy_bypass_environment
237 self.env.set('NO_PROXY',
238 'localhost, anotherdomain.com, newdomain.com:1234')
239 self.assertTrue(bypass('localhost'))
240 self.assertTrue(bypass('LocalHost')) # MixedCase
241 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
242 self.assertTrue(bypass('newdomain.com:1234'))
243 self.assertTrue(bypass('anotherdomain.com:8888'))
244 self.assertTrue(bypass('www.newdomain.com:1234'))
245 self.assertFalse(bypass('prelocalhost'))
246 self.assertFalse(bypass('newdomain.com')) # no port
247 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700248
249class ProxyTests_withOrderedEnv(unittest.TestCase):
250
251 def setUp(self):
252 # We need to test conditions, where variable order _is_ significant
253 self._saved_env = os.environ
254 # Monkey patch os.environ, start with empty fake environment
255 os.environ = collections.OrderedDict()
256
257 def tearDown(self):
258 os.environ = self._saved_env
259
260 def test_getproxies_environment_prefer_lowercase(self):
261 # Test lowercase preference with removal
262 os.environ['no_proxy'] = ''
263 os.environ['No_Proxy'] = 'localhost'
264 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
265 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
266 os.environ['http_proxy'] = ''
267 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
268 proxies = urllib.request.getproxies_environment()
269 self.assertEqual({}, proxies)
270 # Test lowercase preference of proxy bypass and correct matching including ports
271 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
272 os.environ['No_Proxy'] = 'xyz.com'
273 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
274 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
275 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
276 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
277 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
278 # Test lowercase preference with replacement
279 os.environ['http_proxy'] = 'http://somewhere:3128'
280 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
281 proxies = urllib.request.getproxies_environment()
282 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000283
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700284class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000285 """Test urlopen() opening a fake http connection."""
286
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000287 def check_read(self, ver):
288 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000289 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000290 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000291 self.assertEqual(fp.readline(), b"Hello!")
292 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000293 self.assertEqual(fp.geturl(), 'http://python.org/')
294 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000295 finally:
296 self.unfakehttp()
297
Senthil Kumaran26430412011-04-13 07:01:19 +0800298 def test_url_fragment(self):
299 # Issue #11703: geturl() omits fragments in the original URL.
300 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800301 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800302 try:
303 fp = urllib.request.urlopen(url)
304 self.assertEqual(fp.geturl(), url)
305 finally:
306 self.unfakehttp()
307
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800308 def test_willclose(self):
309 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800310 try:
311 resp = urlopen("http://www.python.org")
312 self.assertTrue(resp.fp.will_close)
313 finally:
314 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800315
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000316 def test_read_0_9(self):
317 # "0.9" response accepted (but not "simple responses" without
318 # a status line)
319 self.check_read(b"0.9")
320
321 def test_read_1_0(self):
322 self.check_read(b"1.0")
323
324 def test_read_1_1(self):
325 self.check_read(b"1.1")
326
Christian Heimes57dddfb2008-01-02 18:30:52 +0000327 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200328 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000329 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
330Date: Wed, 02 Jan 2008 03:03:54 GMT
331Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
332Connection: close
333Content-Type: text/html; charset=iso-8859-1
334''')
335 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200336 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000337 finally:
338 self.unfakehttp()
339
guido@google.coma119df92011-03-29 11:41:02 -0700340 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200341 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700342 self.fakehttp(b'''HTTP/1.1 302 Found
343Date: Wed, 02 Jan 2008 03:03:54 GMT
344Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
345Location: file://guidocomputer.athome.com:/python/license
346Connection: close
347Content-Type: text/html; charset=iso-8859-1
348''')
349 try:
Martin Pantera0370222016-02-04 06:01:35 +0000350 msg = "Redirection to url 'file:"
351 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
352 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700353 finally:
354 self.unfakehttp()
355
Martin Pantera0370222016-02-04 06:01:35 +0000356 def test_redirect_limit_independent(self):
357 # Ticket #12923: make sure independent requests each use their
358 # own retry limit.
359 for i in range(FancyURLopener().maxtries):
360 self.fakehttp(b'''HTTP/1.1 302 Found
361Location: file://guidocomputer.athome.com:/python/license
362Connection: close
363''')
364 try:
365 self.assertRaises(urllib.error.HTTPError, urlopen,
366 "http://something")
367 finally:
368 self.unfakehttp()
369
Guido van Rossumd8faa362007-04-27 19:54:29 +0000370 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200371 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000372 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000373 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000374 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200375 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000376 finally:
377 self.unfakehttp()
378
Senthil Kumaranf5776862012-10-21 13:30:02 -0700379 def test_missing_localfile(self):
380 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700381 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700382 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700383 self.assertTrue(e.exception.filename)
384 self.assertTrue(e.exception.reason)
385
386 def test_file_notexists(self):
387 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700388 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700389 try:
390 self.assertTrue(os.path.exists(tmp_file))
391 with urlopen(tmp_fileurl) as fobj:
392 self.assertTrue(fobj)
393 finally:
394 os.close(fd)
395 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700396 self.assertFalse(os.path.exists(tmp_file))
397 with self.assertRaises(urllib.error.URLError):
398 urlopen(tmp_fileurl)
399
400 def test_ftp_nohost(self):
401 test_ftp_url = 'ftp:///path'
402 with self.assertRaises(urllib.error.URLError) as e:
403 urlopen(test_ftp_url)
404 self.assertFalse(e.exception.filename)
405 self.assertTrue(e.exception.reason)
406
407 def test_ftp_nonexisting(self):
408 with self.assertRaises(urllib.error.URLError) as e:
409 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
410 self.assertFalse(e.exception.filename)
411 self.assertTrue(e.exception.reason)
412
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700413 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
414 def test_ftp_cache_pruning(self):
415 self.fakeftp()
416 try:
417 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
418 urlopen('ftp://localhost')
419 finally:
420 self.unfakeftp()
421
Senthil Kumaranf5776862012-10-21 13:30:02 -0700422
Senthil Kumarande0eb242010-08-01 17:53:37 +0000423 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000424 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000425 try:
426 fp = urlopen("http://user:pass@python.org/")
427 self.assertEqual(fp.readline(), b"Hello!")
428 self.assertEqual(fp.readline(), b"")
429 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
430 self.assertEqual(fp.getcode(), 200)
431 finally:
432 self.unfakehttp()
433
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800434 def test_userpass_inurl_w_spaces(self):
435 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
436 try:
437 userpass = "a b:c d"
438 url = "http://{}@python.org/".format(userpass)
439 fakehttp_wrapper = http.client.HTTPConnection
440 authorization = ("Authorization: Basic %s\r\n" %
441 b64encode(userpass.encode("ASCII")).decode("ASCII"))
442 fp = urlopen(url)
443 # The authorization header must be in place
444 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
445 self.assertEqual(fp.readline(), b"Hello!")
446 self.assertEqual(fp.readline(), b"")
447 # the spaces are quoted in URL so no match
448 self.assertNotEqual(fp.geturl(), url)
449 self.assertEqual(fp.getcode(), 200)
450 finally:
451 self.unfakehttp()
452
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700453 def test_URLopener_deprecation(self):
454 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700455 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700456
Antoine Pitrou07df6552014-11-02 17:23:14 +0100457 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800458 def test_cafile_and_context(self):
459 context = ssl.create_default_context()
460 with self.assertRaises(ValueError):
461 urllib.request.urlopen(
462 "https://localhost", cafile="/nonexistent/path", context=context
463 )
464
Antoine Pitroudf204be2012-11-24 17:59:08 +0100465class urlopen_DataTests(unittest.TestCase):
466 """Test urlopen() opening a data URL."""
467
468 def setUp(self):
469 # text containing URL special- and unicode-characters
470 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
471 # 2x1 pixel RGB PNG image with one black and one white pixel
472 self.image = (
473 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
474 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
475 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
476 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
477
478 self.text_url = (
479 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
480 "D%26%20%C3%B6%20%C3%84%20")
481 self.text_url_base64 = (
482 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
483 "sJT0mIPYgxCA%3D")
484 # base64 encoded data URL that contains ignorable spaces,
485 # such as "\n", " ", "%0A", and "%20".
486 self.image_url = (
487 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
488 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
489 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
490
491 self.text_url_resp = urllib.request.urlopen(self.text_url)
492 self.text_url_base64_resp = urllib.request.urlopen(
493 self.text_url_base64)
494 self.image_url_resp = urllib.request.urlopen(self.image_url)
495
496 def test_interface(self):
497 # Make sure object returned by urlopen() has the specified methods
498 for attr in ("read", "readline", "readlines",
499 "close", "info", "geturl", "getcode", "__iter__"):
500 self.assertTrue(hasattr(self.text_url_resp, attr),
501 "object returned by urlopen() lacks %s attribute" %
502 attr)
503
504 def test_info(self):
505 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
506 self.assertEqual(self.text_url_base64_resp.info().get_params(),
507 [('text/plain', ''), ('charset', 'ISO-8859-1')])
508 self.assertEqual(self.image_url_resp.info()['content-length'],
509 str(len(self.image)))
510 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
511 [('text/plain', ''), ('charset', 'US-ASCII')])
512
513 def test_geturl(self):
514 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
515 self.assertEqual(self.text_url_base64_resp.geturl(),
516 self.text_url_base64)
517 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
518
519 def test_read_text(self):
520 self.assertEqual(self.text_url_resp.read().decode(
521 dict(self.text_url_resp.info().get_params())['charset']), self.text)
522
523 def test_read_text_base64(self):
524 self.assertEqual(self.text_url_base64_resp.read().decode(
525 dict(self.text_url_base64_resp.info().get_params())['charset']),
526 self.text)
527
528 def test_read_image(self):
529 self.assertEqual(self.image_url_resp.read(), self.image)
530
531 def test_missing_comma(self):
532 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
533
534 def test_invalid_base64_data(self):
535 # missing padding character
536 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
537
Brett Cannon19691362003-04-29 05:08:06 +0000538class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000539 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000540
Brett Cannon19691362003-04-29 05:08:06 +0000541 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000542 # Create a list of temporary files. Each item in the list is a file
543 # name (absolute path or relative to the current working directory).
544 # All files in this list will be deleted in the tearDown method. Note,
545 # this only helps to makes sure temporary files get deleted, but it
546 # does nothing about trying to close files that may still be open. It
547 # is the responsibility of the developer to properly close files even
548 # when exceptional conditions occur.
549 self.tempFiles = []
550
Brett Cannon19691362003-04-29 05:08:06 +0000551 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000552 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000553 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000554 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000555 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000556 FILE.write(self.text)
557 FILE.close()
558 finally:
559 try: FILE.close()
560 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000561
562 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000563 # Delete the temporary files.
564 for each in self.tempFiles:
565 try: os.remove(each)
566 except: pass
567
568 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000569 filePath = os.path.abspath(filePath)
570 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000571 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000572 except UnicodeEncodeError:
573 raise unittest.SkipTest("filePath is not encodable to utf8")
574 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000575
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000576 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000577 """Creates a new temporary file containing the specified data,
578 registers the file for deletion during the test fixture tear down, and
579 returns the absolute path of the file."""
580
581 newFd, newFilePath = tempfile.mkstemp()
582 try:
583 self.registerFileForCleanUp(newFilePath)
584 newFile = os.fdopen(newFd, "wb")
585 newFile.write(data)
586 newFile.close()
587 finally:
588 try: newFile.close()
589 except: pass
590 return newFilePath
591
592 def registerFileForCleanUp(self, fileName):
593 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000594
595 def test_basic(self):
596 # Make sure that a local file just gets its own location returned and
597 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000598 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000599 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000600 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000601 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000602 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000603
604 def test_copy(self):
605 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000606 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000607 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000608 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000609 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000610 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000611 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000612 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000613 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000614 try:
615 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000616 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000617 finally:
618 try: FILE.close()
619 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000620 self.assertEqual(self.text, text)
621
622 def test_reporthook(self):
623 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700624 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
625 self.assertIsInstance(block_count, int)
626 self.assertIsInstance(block_read_size, int)
627 self.assertIsInstance(file_size, int)
628 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000629 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000630 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000631 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000632 urllib.request.urlretrieve(
633 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000634 second_temp, hooktester)
635
636 def test_reporthook_0_bytes(self):
637 # Test on zero length file. Should call reporthook only 1 time.
638 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700639 def hooktester(block_count, block_read_size, file_size, _report=report):
640 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000641 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000642 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000643 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000644 self.assertEqual(len(report), 1)
645 self.assertEqual(report[0][2], 0)
646
647 def test_reporthook_5_bytes(self):
648 # Test on 5 byte file. Should call reporthook only 2 times (once when
649 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700650 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000651 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700652 def hooktester(block_count, block_read_size, file_size, _report=report):
653 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000654 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000655 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000656 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000657 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800658 self.assertEqual(report[0][2], 5)
659 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000660
661 def test_reporthook_8193_bytes(self):
662 # Test on 8193 byte file. Should call reporthook only 3 times (once
663 # when the "network connection" is established, once for the next 8192
664 # bytes, and once for the last byte).
665 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700666 def hooktester(block_count, block_read_size, file_size, _report=report):
667 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000668 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000669 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000670 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000671 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800672 self.assertEqual(report[0][2], 8193)
673 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700674 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800675 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000676
Senthil Kumarance260142011-11-01 01:35:17 +0800677
678class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
679 """Test urllib.urlretrieve() using fake http connections"""
680
681 def test_short_content_raises_ContentTooShortError(self):
682 self.fakehttp(b'''HTTP/1.1 200 OK
683Date: Wed, 02 Jan 2008 03:03:54 GMT
684Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
685Connection: close
686Content-Length: 100
687Content-Type: text/html; charset=iso-8859-1
688
689FF
690''')
691
692 def _reporthook(par1, par2, par3):
693 pass
694
695 with self.assertRaises(urllib.error.ContentTooShortError):
696 try:
697 urllib.request.urlretrieve('http://example.com/',
698 reporthook=_reporthook)
699 finally:
700 self.unfakehttp()
701
702 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
703 self.fakehttp(b'''HTTP/1.1 200 OK
704Date: Wed, 02 Jan 2008 03:03:54 GMT
705Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
706Connection: close
707Content-Length: 100
708Content-Type: text/html; charset=iso-8859-1
709
710FF
711''')
712 with self.assertRaises(urllib.error.ContentTooShortError):
713 try:
714 urllib.request.urlretrieve('http://example.com/')
715 finally:
716 self.unfakehttp()
717
718
Brett Cannon74bfd702003-04-25 09:39:47 +0000719class QuotingTests(unittest.TestCase):
720 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000721
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000722 According to RFC 2396 (Uniform Resource Identifiers), to escape a
723 character you write it as '%' + <2 character US-ASCII hex value>.
724 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
725 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000726
727 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000728
Brett Cannon74bfd702003-04-25 09:39:47 +0000729 Reserved characters : ";/?:@&=+$,"
730 Have special meaning in URIs and must be escaped if not being used for
731 their special meaning
732 Data characters : letters, digits, and "-_.!~*'()"
733 Unreserved and do not need to be escaped; can be, though, if desired
734 Control characters : 0x00 - 0x1F, 0x7F
735 Have no use in URIs so must be escaped
736 space : 0x20
737 Must be escaped
738 Delimiters : '<>#%"'
739 Must be escaped
740 Unwise : "{}|\^[]`"
741 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000742
Brett Cannon74bfd702003-04-25 09:39:47 +0000743 """
744
745 def test_never_quote(self):
746 # Make sure quote() does not quote letters, digits, and "_,.-"
747 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
748 "abcdefghijklmnopqrstuvwxyz",
749 "0123456789",
750 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000751 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000752 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000753 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000754 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000755 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000756 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000757
758 def test_default_safe(self):
759 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000760 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000761
762 def test_safe(self):
763 # Test setting 'safe' parameter does what it should do
764 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000765 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000766 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000767 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000768 result = urllib.parse.quote_plus(quote_by_default,
769 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000770 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000771 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000772 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000773 # Safe expressed as bytes rather than str
774 result = urllib.parse.quote(quote_by_default, safe=b"<>")
775 self.assertEqual(quote_by_default, result,
776 "using quote(): %r != %r" % (quote_by_default, result))
777 # "Safe" non-ASCII characters should have no effect
778 # (Since URIs are not allowed to have non-ASCII characters)
779 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
780 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
781 self.assertEqual(expect, result,
782 "using quote(): %r != %r" %
783 (expect, result))
784 # Same as above, but using a bytes rather than str
785 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
786 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
787 self.assertEqual(expect, result,
788 "using quote(): %r != %r" %
789 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000790
791 def test_default_quoting(self):
792 # Make sure all characters that should be quoted are by default sans
793 # space (separate test for that).
794 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
795 should_quote.append('<>#%"{}|\^[]`')
796 should_quote.append(chr(127)) # For 0x7F
797 should_quote = ''.join(should_quote)
798 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000799 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000800 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000801 "using quote(): "
802 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000803 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000804 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000805 self.assertEqual(hexescape(char), result,
806 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000807 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000808 (char, hexescape(char), result))
809 del should_quote
810 partial_quote = "ab[]cd"
811 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000812 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000813 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000814 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800815 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000816 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000817 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000818
819 def test_quoting_space(self):
820 # Make sure quote() and quote_plus() handle spaces as specified in
821 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000823 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000824 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000826 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000827 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000828 given = "a b cd e f"
829 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000830 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000831 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000832 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000833 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000834 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000835 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000836 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000837
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000838 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000839 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000840 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000841 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000842 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000843 # Test with bytes
844 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
845 'alpha%2Bbeta+gamma')
846 # Test with safe bytes
847 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
848 'alpha+beta+gamma')
849
850 def test_quote_bytes(self):
851 # Bytes should quote directly to percent-encoded values
852 given = b"\xa2\xd8ab\xff"
853 expect = "%A2%D8ab%FF"
854 result = urllib.parse.quote(given)
855 self.assertEqual(expect, result,
856 "using quote(): %r != %r" % (expect, result))
857 # Encoding argument should raise type error on bytes input
858 self.assertRaises(TypeError, urllib.parse.quote, given,
859 encoding="latin-1")
860 # quote_from_bytes should work the same
861 result = urllib.parse.quote_from_bytes(given)
862 self.assertEqual(expect, result,
863 "using quote_from_bytes(): %r != %r"
864 % (expect, result))
865
866 def test_quote_with_unicode(self):
867 # Characters in Latin-1 range, encoded by default in UTF-8
868 given = "\xa2\xd8ab\xff"
869 expect = "%C2%A2%C3%98ab%C3%BF"
870 result = urllib.parse.quote(given)
871 self.assertEqual(expect, result,
872 "using quote(): %r != %r" % (expect, result))
873 # Characters in Latin-1 range, encoded by with None (default)
874 result = urllib.parse.quote(given, encoding=None, errors=None)
875 self.assertEqual(expect, result,
876 "using quote(): %r != %r" % (expect, result))
877 # Characters in Latin-1 range, encoded with Latin-1
878 given = "\xa2\xd8ab\xff"
879 expect = "%A2%D8ab%FF"
880 result = urllib.parse.quote(given, encoding="latin-1")
881 self.assertEqual(expect, result,
882 "using quote(): %r != %r" % (expect, result))
883 # Characters in BMP, encoded by default in UTF-8
884 given = "\u6f22\u5b57" # "Kanji"
885 expect = "%E6%BC%A2%E5%AD%97"
886 result = urllib.parse.quote(given)
887 self.assertEqual(expect, result,
888 "using quote(): %r != %r" % (expect, result))
889 # Characters in BMP, encoded with Latin-1
890 given = "\u6f22\u5b57"
891 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
892 encoding="latin-1")
893 # Characters in BMP, encoded with Latin-1, with replace error handling
894 given = "\u6f22\u5b57"
895 expect = "%3F%3F" # "??"
896 result = urllib.parse.quote(given, encoding="latin-1",
897 errors="replace")
898 self.assertEqual(expect, result,
899 "using quote(): %r != %r" % (expect, result))
900 # Characters in BMP, Latin-1, with xmlcharref error handling
901 given = "\u6f22\u5b57"
902 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
903 result = urllib.parse.quote(given, encoding="latin-1",
904 errors="xmlcharrefreplace")
905 self.assertEqual(expect, result,
906 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000907
Georg Brandlfaf41492009-05-26 18:31:11 +0000908 def test_quote_plus_with_unicode(self):
909 # Encoding (latin-1) test for quote_plus
910 given = "\xa2\xd8 \xff"
911 expect = "%A2%D8+%FF"
912 result = urllib.parse.quote_plus(given, encoding="latin-1")
913 self.assertEqual(expect, result,
914 "using quote_plus(): %r != %r" % (expect, result))
915 # Errors test for quote_plus
916 given = "ab\u6f22\u5b57 cd"
917 expect = "ab%3F%3F+cd"
918 result = urllib.parse.quote_plus(given, encoding="latin-1",
919 errors="replace")
920 self.assertEqual(expect, result,
921 "using quote_plus(): %r != %r" % (expect, result))
922
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000923
Brett Cannon74bfd702003-04-25 09:39:47 +0000924class UnquotingTests(unittest.TestCase):
925 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000926
Brett Cannon74bfd702003-04-25 09:39:47 +0000927 See the doc string for quoting_Tests for details on quoting and such.
928
929 """
930
931 def test_unquoting(self):
932 # Make sure unquoting of all ASCII values works
933 escape_list = []
934 for num in range(128):
935 given = hexescape(chr(num))
936 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000937 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000938 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000939 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000940 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000941 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000942 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000943 (expect, result))
944 escape_list.append(given)
945 escape_string = ''.join(escape_list)
946 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000947 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000948 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000949 "using unquote(): not all characters escaped: "
950 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000951 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
952 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000953 with support.check_warnings(('', BytesWarning), quiet=True):
954 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000955
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000956 def test_unquoting_badpercent(self):
957 # Test unquoting on bad percent-escapes
958 given = '%xab'
959 expect = given
960 result = urllib.parse.unquote(given)
961 self.assertEqual(expect, result, "using unquote(): %r != %r"
962 % (expect, result))
963 given = '%x'
964 expect = given
965 result = urllib.parse.unquote(given)
966 self.assertEqual(expect, result, "using unquote(): %r != %r"
967 % (expect, result))
968 given = '%'
969 expect = given
970 result = urllib.parse.unquote(given)
971 self.assertEqual(expect, result, "using unquote(): %r != %r"
972 % (expect, result))
973 # unquote_to_bytes
974 given = '%xab'
975 expect = bytes(given, 'ascii')
976 result = urllib.parse.unquote_to_bytes(given)
977 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
978 % (expect, result))
979 given = '%x'
980 expect = bytes(given, 'ascii')
981 result = urllib.parse.unquote_to_bytes(given)
982 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
983 % (expect, result))
984 given = '%'
985 expect = bytes(given, 'ascii')
986 result = urllib.parse.unquote_to_bytes(given)
987 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
988 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000989 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
990 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000991
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000992 def test_unquoting_mixed_case(self):
993 # Test unquoting on mixed-case hex digits in the percent-escapes
994 given = '%Ab%eA'
995 expect = b'\xab\xea'
996 result = urllib.parse.unquote_to_bytes(given)
997 self.assertEqual(expect, result,
998 "using unquote_to_bytes(): %r != %r"
999 % (expect, result))
1000
Brett Cannon74bfd702003-04-25 09:39:47 +00001001 def test_unquoting_parts(self):
1002 # Make sure unquoting works when have non-quoted characters
1003 # interspersed
1004 given = 'ab%sd' % hexescape('c')
1005 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001006 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001007 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001008 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001009 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001010 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001011 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001012
Brett Cannon74bfd702003-04-25 09:39:47 +00001013 def test_unquoting_plus(self):
1014 # Test difference between unquote() and unquote_plus()
1015 given = "are+there+spaces..."
1016 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001017 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001018 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001019 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001020 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001021 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001022 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001023 "using unquote_plus(): %r != %r" % (expect, result))
1024
1025 def test_unquote_to_bytes(self):
1026 given = 'br%C3%BCckner_sapporo_20050930.doc'
1027 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1028 result = urllib.parse.unquote_to_bytes(given)
1029 self.assertEqual(expect, result,
1030 "using unquote_to_bytes(): %r != %r"
1031 % (expect, result))
1032 # Test on a string with unescaped non-ASCII characters
1033 # (Technically an invalid URI; expect those characters to be UTF-8
1034 # encoded).
1035 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1036 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1037 self.assertEqual(expect, result,
1038 "using unquote_to_bytes(): %r != %r"
1039 % (expect, result))
1040 # Test with a bytes as input
1041 given = b'%A2%D8ab%FF'
1042 expect = b'\xa2\xd8ab\xff'
1043 result = urllib.parse.unquote_to_bytes(given)
1044 self.assertEqual(expect, result,
1045 "using unquote_to_bytes(): %r != %r"
1046 % (expect, result))
1047 # Test with a bytes as input, with unescaped non-ASCII bytes
1048 # (Technically an invalid URI; expect those bytes to be preserved)
1049 given = b'%A2\xd8ab%FF'
1050 expect = b'\xa2\xd8ab\xff'
1051 result = urllib.parse.unquote_to_bytes(given)
1052 self.assertEqual(expect, result,
1053 "using unquote_to_bytes(): %r != %r"
1054 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001055
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001056 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001057 # Characters in the Latin-1 range, encoded with UTF-8
1058 given = 'br%C3%BCckner_sapporo_20050930.doc'
1059 expect = 'br\u00fcckner_sapporo_20050930.doc'
1060 result = urllib.parse.unquote(given)
1061 self.assertEqual(expect, result,
1062 "using unquote(): %r != %r" % (expect, result))
1063 # Characters in the Latin-1 range, encoded with None (default)
1064 result = urllib.parse.unquote(given, encoding=None, errors=None)
1065 self.assertEqual(expect, result,
1066 "using unquote(): %r != %r" % (expect, result))
1067
1068 # Characters in the Latin-1 range, encoded with Latin-1
1069 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1070 encoding="latin-1")
1071 expect = 'br\u00fcckner_sapporo_20050930.doc'
1072 self.assertEqual(expect, result,
1073 "using unquote(): %r != %r" % (expect, result))
1074
1075 # Characters in BMP, encoded with UTF-8
1076 given = "%E6%BC%A2%E5%AD%97"
1077 expect = "\u6f22\u5b57" # "Kanji"
1078 result = urllib.parse.unquote(given)
1079 self.assertEqual(expect, result,
1080 "using unquote(): %r != %r" % (expect, result))
1081
1082 # Decode with UTF-8, invalid sequence
1083 given = "%F3%B1"
1084 expect = "\ufffd" # Replacement character
1085 result = urllib.parse.unquote(given)
1086 self.assertEqual(expect, result,
1087 "using unquote(): %r != %r" % (expect, result))
1088
1089 # Decode with UTF-8, invalid sequence, replace errors
1090 result = urllib.parse.unquote(given, errors="replace")
1091 self.assertEqual(expect, result,
1092 "using unquote(): %r != %r" % (expect, result))
1093
1094 # Decode with UTF-8, invalid sequence, ignoring errors
1095 given = "%F3%B1"
1096 expect = ""
1097 result = urllib.parse.unquote(given, errors="ignore")
1098 self.assertEqual(expect, result,
1099 "using unquote(): %r != %r" % (expect, result))
1100
1101 # A mix of non-ASCII and percent-encoded characters, UTF-8
1102 result = urllib.parse.unquote("\u6f22%C3%BC")
1103 expect = '\u6f22\u00fc'
1104 self.assertEqual(expect, result,
1105 "using unquote(): %r != %r" % (expect, result))
1106
1107 # A mix of non-ASCII and percent-encoded characters, Latin-1
1108 # (Note, the string contains non-Latin-1-representable characters)
1109 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1110 expect = '\u6f22\u00fc'
1111 self.assertEqual(expect, result,
1112 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001113
Brett Cannon74bfd702003-04-25 09:39:47 +00001114class urlencode_Tests(unittest.TestCase):
1115 """Tests for urlencode()"""
1116
1117 def help_inputtype(self, given, test_type):
1118 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001119
Brett Cannon74bfd702003-04-25 09:39:47 +00001120 'given' must lead to only the pairs:
1121 * 1st, 1
1122 * 2nd, 2
1123 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001124
Brett Cannon74bfd702003-04-25 09:39:47 +00001125 Test cannot assume anything about order. Docs make no guarantee and
1126 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001127
Brett Cannon74bfd702003-04-25 09:39:47 +00001128 """
1129 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001130 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001131 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001132 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001133 "testing %s: %s not found in %s" %
1134 (test_type, expected, result))
1135 self.assertEqual(result.count('&'), 2,
1136 "testing %s: expected 2 '&'s; got %s" %
1137 (test_type, result.count('&')))
1138 amp_location = result.index('&')
1139 on_amp_left = result[amp_location - 1]
1140 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001141 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001142 "testing %s: '&' not located in proper place in %s" %
1143 (test_type, result))
1144 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1145 "testing %s: "
1146 "unexpected number of characters: %s != %s" %
1147 (test_type, len(result), (5 * 3) + 2))
1148
1149 def test_using_mapping(self):
1150 # Test passing in a mapping object as an argument.
1151 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1152 "using dict as input type")
1153
1154 def test_using_sequence(self):
1155 # Test passing in a sequence of two-item sequences as an argument.
1156 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1157 "using sequence of two-item tuples as input")
1158
1159 def test_quoting(self):
1160 # Make sure keys and values are quoted using quote_plus()
1161 given = {"&":"="}
1162 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001163 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001164 self.assertEqual(expect, result)
1165 given = {"key name":"A bunch of pluses"}
1166 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001167 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001168 self.assertEqual(expect, result)
1169
1170 def test_doseq(self):
1171 # Test that passing True for 'doseq' parameter works correctly
1172 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001173 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1174 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001175 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001176 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001177 for value in given["sequence"]:
1178 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001179 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001180 self.assertEqual(result.count('&'), 2,
1181 "Expected 2 '&'s, got %s" % result.count('&'))
1182
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001183 def test_empty_sequence(self):
1184 self.assertEqual("", urllib.parse.urlencode({}))
1185 self.assertEqual("", urllib.parse.urlencode([]))
1186
1187 def test_nonstring_values(self):
1188 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1189 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1190
1191 def test_nonstring_seq_values(self):
1192 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1193 self.assertEqual("a=None&a=a",
1194 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001195 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001196 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001197 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001198
Senthil Kumarandf022da2010-07-03 17:48:22 +00001199 def test_urlencode_encoding(self):
1200 # ASCII encoding. Expect %3F with errors="replace'
1201 given = (('\u00a0', '\u00c1'),)
1202 expect = '%3F=%3F'
1203 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1204 self.assertEqual(expect, result)
1205
1206 # Default is UTF-8 encoding.
1207 given = (('\u00a0', '\u00c1'),)
1208 expect = '%C2%A0=%C3%81'
1209 result = urllib.parse.urlencode(given)
1210 self.assertEqual(expect, result)
1211
1212 # Latin-1 encoding.
1213 given = (('\u00a0', '\u00c1'),)
1214 expect = '%A0=%C1'
1215 result = urllib.parse.urlencode(given, encoding="latin-1")
1216 self.assertEqual(expect, result)
1217
1218 def test_urlencode_encoding_doseq(self):
1219 # ASCII Encoding. Expect %3F with errors="replace'
1220 given = (('\u00a0', '\u00c1'),)
1221 expect = '%3F=%3F'
1222 result = urllib.parse.urlencode(given, doseq=True,
1223 encoding="ASCII", errors="replace")
1224 self.assertEqual(expect, result)
1225
1226 # ASCII Encoding. On a sequence of values.
1227 given = (("\u00a0", (1, "\u00c1")),)
1228 expect = '%3F=1&%3F=%3F'
1229 result = urllib.parse.urlencode(given, True,
1230 encoding="ASCII", errors="replace")
1231 self.assertEqual(expect, result)
1232
1233 # Utf-8
1234 given = (("\u00a0", "\u00c1"),)
1235 expect = '%C2%A0=%C3%81'
1236 result = urllib.parse.urlencode(given, True)
1237 self.assertEqual(expect, result)
1238
1239 given = (("\u00a0", (42, "\u00c1")),)
1240 expect = '%C2%A0=42&%C2%A0=%C3%81'
1241 result = urllib.parse.urlencode(given, True)
1242 self.assertEqual(expect, result)
1243
1244 # latin-1
1245 given = (("\u00a0", "\u00c1"),)
1246 expect = '%A0=%C1'
1247 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1248 self.assertEqual(expect, result)
1249
1250 given = (("\u00a0", (42, "\u00c1")),)
1251 expect = '%A0=42&%A0=%C1'
1252 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1253 self.assertEqual(expect, result)
1254
1255 def test_urlencode_bytes(self):
1256 given = ((b'\xa0\x24', b'\xc1\x24'),)
1257 expect = '%A0%24=%C1%24'
1258 result = urllib.parse.urlencode(given)
1259 self.assertEqual(expect, result)
1260 result = urllib.parse.urlencode(given, True)
1261 self.assertEqual(expect, result)
1262
1263 # Sequence of values
1264 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1265 expect = '%A0%24=42&%A0%24=%C1%24'
1266 result = urllib.parse.urlencode(given, True)
1267 self.assertEqual(expect, result)
1268
1269 def test_urlencode_encoding_safe_parameter(self):
1270
1271 # Send '$' (\x24) as safe character
1272 # Default utf-8 encoding
1273
1274 given = ((b'\xa0\x24', b'\xc1\x24'),)
1275 result = urllib.parse.urlencode(given, safe=":$")
1276 expect = '%A0$=%C1$'
1277 self.assertEqual(expect, result)
1278
1279 given = ((b'\xa0\x24', b'\xc1\x24'),)
1280 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1281 expect = '%A0$=%C1$'
1282 self.assertEqual(expect, result)
1283
1284 # Safe parameter in sequence
1285 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1286 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1287 result = urllib.parse.urlencode(given, True, safe=":$")
1288 self.assertEqual(expect, result)
1289
1290 # Test all above in latin-1 encoding
1291
1292 given = ((b'\xa0\x24', b'\xc1\x24'),)
1293 result = urllib.parse.urlencode(given, safe=":$",
1294 encoding="latin-1")
1295 expect = '%A0$=%C1$'
1296 self.assertEqual(expect, result)
1297
1298 given = ((b'\xa0\x24', b'\xc1\x24'),)
1299 expect = '%A0$=%C1$'
1300 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1301 encoding="latin-1")
1302
1303 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1304 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1305 result = urllib.parse.urlencode(given, True, safe=":$",
1306 encoding="latin-1")
1307 self.assertEqual(expect, result)
1308
Brett Cannon74bfd702003-04-25 09:39:47 +00001309class Pathname_Tests(unittest.TestCase):
1310 """Test pathname2url() and url2pathname()"""
1311
1312 def test_basic(self):
1313 # Make sure simple tests pass
1314 expected_path = os.path.join("parts", "of", "a", "path")
1315 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001316 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001317 self.assertEqual(expected_url, result,
1318 "pathname2url() failed; %s != %s" %
1319 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001320 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001321 self.assertEqual(expected_path, result,
1322 "url2pathame() failed; %s != %s" %
1323 (result, expected_path))
1324
1325 def test_quoting(self):
1326 # Test automatic quoting and unquoting works for pathnam2url() and
1327 # url2pathname() respectively
1328 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001329 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1330 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001331 self.assertEqual(expect, result,
1332 "pathname2url() failed; %s != %s" %
1333 (expect, result))
1334 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001335 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001336 self.assertEqual(expect, result,
1337 "url2pathname() failed; %s != %s" %
1338 (expect, result))
1339 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001340 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1341 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001342 self.assertEqual(expect, result,
1343 "pathname2url() failed; %s != %s" %
1344 (expect, result))
1345 given = "make+sure/using_unquote"
1346 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001347 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001348 self.assertEqual(expect, result,
1349 "url2pathname() failed; %s != %s" %
1350 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001351
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001352 @unittest.skipUnless(sys.platform == 'win32',
1353 'test specific to the urllib.url2path function.')
1354 def test_ntpath(self):
1355 given = ('/C:/', '///C:/', '/C|//')
1356 expect = 'C:\\'
1357 for url in given:
1358 result = urllib.request.url2pathname(url)
1359 self.assertEqual(expect, result,
1360 'urllib.request..url2pathname() failed; %s != %s' %
1361 (expect, result))
1362 given = '///C|/path'
1363 expect = 'C:\\path'
1364 result = urllib.request.url2pathname(given)
1365 self.assertEqual(expect, result,
1366 'urllib.request.url2pathname() failed; %s != %s' %
1367 (expect, result))
1368
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001369class Utility_Tests(unittest.TestCase):
1370 """Testcase to test the various utility functions in the urllib."""
1371
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001372 def test_thishost(self):
1373 """Test the urllib.request.thishost utility function returns a tuple"""
1374 self.assertIsInstance(urllib.request.thishost(), tuple)
1375
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001376
1377class URLopener_Tests(unittest.TestCase):
1378 """Testcase to test the open method of URLopener class."""
1379
1380 def test_quoted_open(self):
1381 class DummyURLopener(urllib.request.URLopener):
1382 def open_spam(self, url):
1383 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001384 with support.check_warnings(
1385 ('DummyURLopener style of invoking requests is deprecated.',
1386 DeprecationWarning)):
1387 self.assertEqual(DummyURLopener().open(
1388 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001389
Ezio Melotti79b99db2013-02-21 02:41:42 +02001390 # test the safe characters are not quoted by urlopen
1391 self.assertEqual(DummyURLopener().open(
1392 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1393 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001394
Guido van Rossume7ba4952007-06-06 23:52:48 +00001395# Just commented them out.
1396# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001397# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001398# fail in one of the tests, sometimes in other. I have a linux, and
1399# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001400# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001401# . Facundo
1402#
1403# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001404# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001405# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1406# serv.settimeout(3)
1407# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1408# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001409# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001410# try:
1411# conn, addr = serv.accept()
1412# conn.send("1 Hola mundo\n")
1413# cantdata = 0
1414# while cantdata < 13:
1415# data = conn.recv(13-cantdata)
1416# cantdata += len(data)
1417# time.sleep(.3)
1418# conn.send("2 No more lines\n")
1419# conn.close()
1420# except socket.timeout:
1421# pass
1422# finally:
1423# serv.close()
1424# evt.set()
1425#
1426# class FTPWrapperTests(unittest.TestCase):
1427#
1428# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001429# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001430# ftplib.FTP.port = 9093
1431# self.evt = threading.Event()
1432# threading.Thread(target=server, args=(self.evt,)).start()
1433# time.sleep(.1)
1434#
1435# def tearDown(self):
1436# self.evt.wait()
1437#
1438# def testBasic(self):
1439# # connects
1440# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001441# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001442#
1443# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001444# # global default timeout is ignored
1445# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001446# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001447# socket.setdefaulttimeout(30)
1448# try:
1449# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1450# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001451# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001452# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001453# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001454#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001455# def testTimeoutDefault(self):
1456# # global default timeout is used
1457# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001458# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001459# socket.setdefaulttimeout(30)
1460# try:
1461# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1462# finally:
1463# socket.setdefaulttimeout(None)
1464# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1465# ftp.close()
1466#
1467# def testTimeoutValue(self):
1468# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1469# timeout=30)
1470# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1471# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001472
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001473
Senthil Kumarande49d642011-10-16 23:54:44 +08001474class RequestTests(unittest.TestCase):
1475 """Unit tests for urllib.request.Request."""
1476
1477 def test_default_values(self):
1478 Request = urllib.request.Request
1479 request = Request("http://www.python.org")
1480 self.assertEqual(request.get_method(), 'GET')
1481 request = Request("http://www.python.org", {})
1482 self.assertEqual(request.get_method(), 'POST')
1483
1484 def test_with_method_arg(self):
1485 Request = urllib.request.Request
1486 request = Request("http://www.python.org", method='HEAD')
1487 self.assertEqual(request.method, 'HEAD')
1488 self.assertEqual(request.get_method(), 'HEAD')
1489 request = Request("http://www.python.org", {}, method='HEAD')
1490 self.assertEqual(request.method, 'HEAD')
1491 self.assertEqual(request.get_method(), 'HEAD')
1492 request = Request("http://www.python.org", method='GET')
1493 self.assertEqual(request.get_method(), 'GET')
1494 request.method = 'HEAD'
1495 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001496
1497
Senthil Kumaran277e9092013-04-10 20:51:19 -07001498class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001499
Senthil Kumaran277e9092013-04-10 20:51:19 -07001500 def test_converting_drive_letter(self):
1501 self.assertEqual(url2pathname("///C|"), 'C:')
1502 self.assertEqual(url2pathname("///C:"), 'C:')
1503 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001504
Senthil Kumaran277e9092013-04-10 20:51:19 -07001505 def test_converting_when_no_drive_letter(self):
1506 # cannot end a raw string in \
1507 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1508 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1509
1510 def test_simple_compare(self):
1511 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1512 r'C:\foo\bar\spam.foo')
1513
1514 def test_non_ascii_drive_letter(self):
1515 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1516
1517 def test_roundtrip_url2pathname(self):
1518 list_of_paths = ['C:',
1519 r'\\\C\test\\',
1520 r'C:\foo\bar\spam.foo'
1521 ]
1522 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001523 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001524
1525class PathName2URLTests(unittest.TestCase):
1526
1527 def test_converting_drive_letter(self):
1528 self.assertEqual(pathname2url("C:"), '///C:')
1529 self.assertEqual(pathname2url("C:\\"), '///C:')
1530
1531 def test_converting_when_no_drive_letter(self):
1532 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1533 '/////folder/test/')
1534 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1535 '////folder/test/')
1536 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1537 '/folder/test/')
1538
1539 def test_simple_compare(self):
1540 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1541 "///C:/foo/bar/spam.foo" )
1542
1543 def test_long_drive_letter(self):
1544 self.assertRaises(IOError, pathname2url, "XX:\\")
1545
1546 def test_roundtrip_pathname2url(self):
1547 list_of_paths = ['///C:',
1548 '/////folder/test/',
1549 '///C:/foo/bar/spam.foo']
1550 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001551 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001552
1553if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001554 unittest.main()