blob: 49e2a2cd6198dd4513fd7dae70d7c8adc478356b [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030059def fakehttp(fakedata):
60 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
89 fakesock = FakeSocket(fakedata)
90
91 def connect(self):
92 self.sock = self.fakesock
93
94 return FakeHTTPConnection
95
96
Senthil Kumarance260142011-11-01 01:35:17 +080097class FakeHTTPMixin(object):
98 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +080099 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300100 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +0800101
102 def unfakehttp(self):
103 http.client.HTTPConnection = self._connection_class
104
105
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700106class FakeFTPMixin(object):
107 def fakeftp(self):
108 class FakeFtpWrapper(object):
109 def __init__(self, user, passwd, host, port, dirs, timeout=None,
110 persistent=True):
111 pass
112
113 def retrfile(self, file, type):
114 return io.BytesIO(), 0
115
116 def close(self):
117 pass
118
119 self._ftpwrapper_class = urllib.request.ftpwrapper
120 urllib.request.ftpwrapper = FakeFtpWrapper
121
122 def unfakeftp(self):
123 urllib.request.ftpwrapper = self._ftpwrapper_class
124
125
Brett Cannon74bfd702003-04-25 09:39:47 +0000126class urlopen_FileTests(unittest.TestCase):
127 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000128
Brett Cannon74bfd702003-04-25 09:39:47 +0000129 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000130 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000131
Brett Cannon74bfd702003-04-25 09:39:47 +0000132 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000133
Brett Cannon74bfd702003-04-25 09:39:47 +0000134 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 # Create a temp file to use for testing
136 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
137 "ascii")
138 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000140 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000141 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000142 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000143 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000144 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000145
Brett Cannon74bfd702003-04-25 09:39:47 +0000146 def tearDown(self):
147 """Shut down the open object"""
148 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000149 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000150
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 def test_interface(self):
152 # Make sure object returned by urlopen() has the specified methods
153 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000154 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000155 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 "object returned by urlopen() lacks %s attribute" %
157 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000158
Brett Cannon74bfd702003-04-25 09:39:47 +0000159 def test_read(self):
160 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000161
Brett Cannon74bfd702003-04-25 09:39:47 +0000162 def test_readline(self):
163 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000164 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000165 "calling readline() after exhausting the file did not"
166 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000167
Brett Cannon74bfd702003-04-25 09:39:47 +0000168 def test_readlines(self):
169 lines_list = self.returned_obj.readlines()
170 self.assertEqual(len(lines_list), 1,
171 "readlines() returned the wrong number of lines")
172 self.assertEqual(lines_list[0], self.text,
173 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000174
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 def test_fileno(self):
176 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000177 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 self.assertEqual(os.read(file_num, len(self.text)), self.text,
179 "Reading on the file descriptor returned by fileno() "
180 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000181
Brett Cannon74bfd702003-04-25 09:39:47 +0000182 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800183 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000184 # by the tearDown() method for the test
185 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000186
Brett Cannon74bfd702003-04-25 09:39:47 +0000187 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000188 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000189
Brett Cannon74bfd702003-04-25 09:39:47 +0000190 def test_geturl(self):
191 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000192
Christian Heimes9bd667a2008-01-20 15:14:11 +0000193 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000194 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000195
Brett Cannon74bfd702003-04-25 09:39:47 +0000196 def test_iter(self):
197 # Test iterator
198 # Don't need to count number of iterations since test would fail the
199 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200200 # comparison.
201 # Use the iterator in the usual implicit way to test for ticket #4608.
202 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000203 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000204
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800205 def test_relativelocalfile(self):
206 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
207
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000208class ProxyTests(unittest.TestCase):
209
210 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000211 # Records changes to env vars
212 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000213 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000214 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000215 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000216 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000217
218 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000219 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000220 self.env.__exit__()
221 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000222
223 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000224 self.env.set('NO_PROXY', 'localhost')
225 proxies = urllib.request.getproxies_environment()
226 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000227 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800228 # List of no_proxies with space.
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700229 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
Senthil Kumaran89976f12011-08-06 12:27:40 +0800230 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700231 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
232 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
233
Martin Panteraa279822016-04-30 01:03:40 +0000234 def test_proxy_bypass_environment_host_match(self):
235 bypass = urllib.request.proxy_bypass_environment
236 self.env.set('NO_PROXY',
237 'localhost, anotherdomain.com, newdomain.com:1234')
238 self.assertTrue(bypass('localhost'))
239 self.assertTrue(bypass('LocalHost')) # MixedCase
240 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
241 self.assertTrue(bypass('newdomain.com:1234'))
242 self.assertTrue(bypass('anotherdomain.com:8888'))
243 self.assertTrue(bypass('www.newdomain.com:1234'))
244 self.assertFalse(bypass('prelocalhost'))
245 self.assertFalse(bypass('newdomain.com')) # no port
246 self.assertFalse(bypass('newdomain.com:1235')) # wrong port
Senthil Kumarana7c0ff22016-04-25 08:16:23 -0700247
248class ProxyTests_withOrderedEnv(unittest.TestCase):
249
250 def setUp(self):
251 # We need to test conditions, where variable order _is_ significant
252 self._saved_env = os.environ
253 # Monkey patch os.environ, start with empty fake environment
254 os.environ = collections.OrderedDict()
255
256 def tearDown(self):
257 os.environ = self._saved_env
258
259 def test_getproxies_environment_prefer_lowercase(self):
260 # Test lowercase preference with removal
261 os.environ['no_proxy'] = ''
262 os.environ['No_Proxy'] = 'localhost'
263 self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
264 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
265 os.environ['http_proxy'] = ''
266 os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
267 proxies = urllib.request.getproxies_environment()
268 self.assertEqual({}, proxies)
269 # Test lowercase preference of proxy bypass and correct matching including ports
270 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
271 os.environ['No_Proxy'] = 'xyz.com'
272 self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
273 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
274 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
275 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
276 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
277 # Test lowercase preference with replacement
278 os.environ['http_proxy'] = 'http://somewhere:3128'
279 os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
280 proxies = urllib.request.getproxies_environment()
281 self.assertEqual('http://somewhere:3128', proxies['http'])
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000282
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700283class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000284 """Test urlopen() opening a fake http connection."""
285
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000286 def check_read(self, ver):
287 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000288 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000289 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000290 self.assertEqual(fp.readline(), b"Hello!")
291 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000292 self.assertEqual(fp.geturl(), 'http://python.org/')
293 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000294 finally:
295 self.unfakehttp()
296
Senthil Kumaran26430412011-04-13 07:01:19 +0800297 def test_url_fragment(self):
298 # Issue #11703: geturl() omits fragments in the original URL.
299 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800300 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800301 try:
302 fp = urllib.request.urlopen(url)
303 self.assertEqual(fp.geturl(), url)
304 finally:
305 self.unfakehttp()
306
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800307 def test_willclose(self):
308 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800309 try:
310 resp = urlopen("http://www.python.org")
311 self.assertTrue(resp.fp.will_close)
312 finally:
313 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800314
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000315 def test_read_0_9(self):
316 # "0.9" response accepted (but not "simple responses" without
317 # a status line)
318 self.check_read(b"0.9")
319
320 def test_read_1_0(self):
321 self.check_read(b"1.0")
322
323 def test_read_1_1(self):
324 self.check_read(b"1.1")
325
Christian Heimes57dddfb2008-01-02 18:30:52 +0000326 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200327 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000328 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
329Date: Wed, 02 Jan 2008 03:03:54 GMT
330Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
331Connection: close
332Content-Type: text/html; charset=iso-8859-1
333''')
334 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200335 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000336 finally:
337 self.unfakehttp()
338
guido@google.coma119df92011-03-29 11:41:02 -0700339 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200340 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700341 self.fakehttp(b'''HTTP/1.1 302 Found
342Date: Wed, 02 Jan 2008 03:03:54 GMT
343Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
344Location: file://guidocomputer.athome.com:/python/license
345Connection: close
346Content-Type: text/html; charset=iso-8859-1
347''')
348 try:
Martin Pantera0370222016-02-04 06:01:35 +0000349 msg = "Redirection to url 'file:"
350 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
351 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700352 finally:
353 self.unfakehttp()
354
Martin Pantera0370222016-02-04 06:01:35 +0000355 def test_redirect_limit_independent(self):
356 # Ticket #12923: make sure independent requests each use their
357 # own retry limit.
358 for i in range(FancyURLopener().maxtries):
359 self.fakehttp(b'''HTTP/1.1 302 Found
360Location: file://guidocomputer.athome.com:/python/license
361Connection: close
362''')
363 try:
364 self.assertRaises(urllib.error.HTTPError, urlopen,
365 "http://something")
366 finally:
367 self.unfakehttp()
368
Guido van Rossumd8faa362007-04-27 19:54:29 +0000369 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200370 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000371 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000372 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000373 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200374 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375 finally:
376 self.unfakehttp()
377
Senthil Kumaranf5776862012-10-21 13:30:02 -0700378 def test_missing_localfile(self):
379 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700380 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700381 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700382 self.assertTrue(e.exception.filename)
383 self.assertTrue(e.exception.reason)
384
385 def test_file_notexists(self):
386 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700387 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700388 try:
389 self.assertTrue(os.path.exists(tmp_file))
390 with urlopen(tmp_fileurl) as fobj:
391 self.assertTrue(fobj)
392 finally:
393 os.close(fd)
394 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700395 self.assertFalse(os.path.exists(tmp_file))
396 with self.assertRaises(urllib.error.URLError):
397 urlopen(tmp_fileurl)
398
399 def test_ftp_nohost(self):
400 test_ftp_url = 'ftp:///path'
401 with self.assertRaises(urllib.error.URLError) as e:
402 urlopen(test_ftp_url)
403 self.assertFalse(e.exception.filename)
404 self.assertTrue(e.exception.reason)
405
406 def test_ftp_nonexisting(self):
407 with self.assertRaises(urllib.error.URLError) as e:
408 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
409 self.assertFalse(e.exception.filename)
410 self.assertTrue(e.exception.reason)
411
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700412 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
413 def test_ftp_cache_pruning(self):
414 self.fakeftp()
415 try:
416 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
417 urlopen('ftp://localhost')
418 finally:
419 self.unfakeftp()
420
Senthil Kumaranf5776862012-10-21 13:30:02 -0700421
Senthil Kumarande0eb242010-08-01 17:53:37 +0000422 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000423 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000424 try:
425 fp = urlopen("http://user:pass@python.org/")
426 self.assertEqual(fp.readline(), b"Hello!")
427 self.assertEqual(fp.readline(), b"")
428 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
429 self.assertEqual(fp.getcode(), 200)
430 finally:
431 self.unfakehttp()
432
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800433 def test_userpass_inurl_w_spaces(self):
434 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
435 try:
436 userpass = "a b:c d"
437 url = "http://{}@python.org/".format(userpass)
438 fakehttp_wrapper = http.client.HTTPConnection
439 authorization = ("Authorization: Basic %s\r\n" %
440 b64encode(userpass.encode("ASCII")).decode("ASCII"))
441 fp = urlopen(url)
442 # The authorization header must be in place
443 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
444 self.assertEqual(fp.readline(), b"Hello!")
445 self.assertEqual(fp.readline(), b"")
446 # the spaces are quoted in URL so no match
447 self.assertNotEqual(fp.geturl(), url)
448 self.assertEqual(fp.getcode(), 200)
449 finally:
450 self.unfakehttp()
451
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700452 def test_URLopener_deprecation(self):
453 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700454 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700455
Antoine Pitrou07df6552014-11-02 17:23:14 +0100456 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800457 def test_cafile_and_context(self):
458 context = ssl.create_default_context()
459 with self.assertRaises(ValueError):
460 urllib.request.urlopen(
461 "https://localhost", cafile="/nonexistent/path", context=context
462 )
463
Antoine Pitroudf204be2012-11-24 17:59:08 +0100464class urlopen_DataTests(unittest.TestCase):
465 """Test urlopen() opening a data URL."""
466
467 def setUp(self):
468 # text containing URL special- and unicode-characters
469 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
470 # 2x1 pixel RGB PNG image with one black and one white pixel
471 self.image = (
472 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
473 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
474 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
475 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
476
477 self.text_url = (
478 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
479 "D%26%20%C3%B6%20%C3%84%20")
480 self.text_url_base64 = (
481 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
482 "sJT0mIPYgxCA%3D")
483 # base64 encoded data URL that contains ignorable spaces,
484 # such as "\n", " ", "%0A", and "%20".
485 self.image_url = (
486 "\n"
487 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
488 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
489
490 self.text_url_resp = urllib.request.urlopen(self.text_url)
491 self.text_url_base64_resp = urllib.request.urlopen(
492 self.text_url_base64)
493 self.image_url_resp = urllib.request.urlopen(self.image_url)
494
495 def test_interface(self):
496 # Make sure object returned by urlopen() has the specified methods
497 for attr in ("read", "readline", "readlines",
498 "close", "info", "geturl", "getcode", "__iter__"):
499 self.assertTrue(hasattr(self.text_url_resp, attr),
500 "object returned by urlopen() lacks %s attribute" %
501 attr)
502
503 def test_info(self):
504 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
505 self.assertEqual(self.text_url_base64_resp.info().get_params(),
506 [('text/plain', ''), ('charset', 'ISO-8859-1')])
507 self.assertEqual(self.image_url_resp.info()['content-length'],
508 str(len(self.image)))
509 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
510 [('text/plain', ''), ('charset', 'US-ASCII')])
511
512 def test_geturl(self):
513 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
514 self.assertEqual(self.text_url_base64_resp.geturl(),
515 self.text_url_base64)
516 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
517
518 def test_read_text(self):
519 self.assertEqual(self.text_url_resp.read().decode(
520 dict(self.text_url_resp.info().get_params())['charset']), self.text)
521
522 def test_read_text_base64(self):
523 self.assertEqual(self.text_url_base64_resp.read().decode(
524 dict(self.text_url_base64_resp.info().get_params())['charset']),
525 self.text)
526
527 def test_read_image(self):
528 self.assertEqual(self.image_url_resp.read(), self.image)
529
530 def test_missing_comma(self):
531 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
532
533 def test_invalid_base64_data(self):
534 # missing padding character
535 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
536
Brett Cannon19691362003-04-29 05:08:06 +0000537class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000538 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000539
Brett Cannon19691362003-04-29 05:08:06 +0000540 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000541 # Create a list of temporary files. Each item in the list is a file
542 # name (absolute path or relative to the current working directory).
543 # All files in this list will be deleted in the tearDown method. Note,
544 # this only helps to makes sure temporary files get deleted, but it
545 # does nothing about trying to close files that may still be open. It
546 # is the responsibility of the developer to properly close files even
547 # when exceptional conditions occur.
548 self.tempFiles = []
549
Brett Cannon19691362003-04-29 05:08:06 +0000550 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000551 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000552 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000553 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000554 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000555 FILE.write(self.text)
556 FILE.close()
557 finally:
558 try: FILE.close()
559 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000560
561 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000562 # Delete the temporary files.
563 for each in self.tempFiles:
564 try: os.remove(each)
565 except: pass
566
567 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000568 filePath = os.path.abspath(filePath)
569 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000570 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000571 except UnicodeEncodeError:
572 raise unittest.SkipTest("filePath is not encodable to utf8")
573 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000574
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000575 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000576 """Creates a new temporary file containing the specified data,
577 registers the file for deletion during the test fixture tear down, and
578 returns the absolute path of the file."""
579
580 newFd, newFilePath = tempfile.mkstemp()
581 try:
582 self.registerFileForCleanUp(newFilePath)
583 newFile = os.fdopen(newFd, "wb")
584 newFile.write(data)
585 newFile.close()
586 finally:
587 try: newFile.close()
588 except: pass
589 return newFilePath
590
591 def registerFileForCleanUp(self, fileName):
592 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000593
594 def test_basic(self):
595 # Make sure that a local file just gets its own location returned and
596 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000597 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000598 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000599 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000600 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000601 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000602
603 def test_copy(self):
604 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000605 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000606 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000607 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000608 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000609 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000610 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000611 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000612 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000613 try:
614 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000615 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000616 finally:
617 try: FILE.close()
618 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000619 self.assertEqual(self.text, text)
620
621 def test_reporthook(self):
622 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700623 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
624 self.assertIsInstance(block_count, int)
625 self.assertIsInstance(block_read_size, int)
626 self.assertIsInstance(file_size, int)
627 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000628 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000629 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000630 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000631 urllib.request.urlretrieve(
632 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000633 second_temp, hooktester)
634
635 def test_reporthook_0_bytes(self):
636 # Test on zero length file. Should call reporthook only 1 time.
637 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700638 def hooktester(block_count, block_read_size, file_size, _report=report):
639 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000640 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000641 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000642 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000643 self.assertEqual(len(report), 1)
644 self.assertEqual(report[0][2], 0)
645
646 def test_reporthook_5_bytes(self):
647 # Test on 5 byte file. Should call reporthook only 2 times (once when
648 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700649 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000650 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700651 def hooktester(block_count, block_read_size, file_size, _report=report):
652 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000653 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000654 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000655 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000656 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800657 self.assertEqual(report[0][2], 5)
658 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000659
660 def test_reporthook_8193_bytes(self):
661 # Test on 8193 byte file. Should call reporthook only 3 times (once
662 # when the "network connection" is established, once for the next 8192
663 # bytes, and once for the last byte).
664 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700665 def hooktester(block_count, block_read_size, file_size, _report=report):
666 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000667 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000668 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000669 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000670 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800671 self.assertEqual(report[0][2], 8193)
672 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700673 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800674 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000675
Senthil Kumarance260142011-11-01 01:35:17 +0800676
677class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
678 """Test urllib.urlretrieve() using fake http connections"""
679
680 def test_short_content_raises_ContentTooShortError(self):
681 self.fakehttp(b'''HTTP/1.1 200 OK
682Date: Wed, 02 Jan 2008 03:03:54 GMT
683Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
684Connection: close
685Content-Length: 100
686Content-Type: text/html; charset=iso-8859-1
687
688FF
689''')
690
691 def _reporthook(par1, par2, par3):
692 pass
693
694 with self.assertRaises(urllib.error.ContentTooShortError):
695 try:
696 urllib.request.urlretrieve('http://example.com/',
697 reporthook=_reporthook)
698 finally:
699 self.unfakehttp()
700
701 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
702 self.fakehttp(b'''HTTP/1.1 200 OK
703Date: Wed, 02 Jan 2008 03:03:54 GMT
704Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
705Connection: close
706Content-Length: 100
707Content-Type: text/html; charset=iso-8859-1
708
709FF
710''')
711 with self.assertRaises(urllib.error.ContentTooShortError):
712 try:
713 urllib.request.urlretrieve('http://example.com/')
714 finally:
715 self.unfakehttp()
716
717
Brett Cannon74bfd702003-04-25 09:39:47 +0000718class QuotingTests(unittest.TestCase):
719 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000720
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000721 According to RFC 2396 (Uniform Resource Identifiers), to escape a
722 character you write it as '%' + <2 character US-ASCII hex value>.
723 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
724 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000725
726 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000727
Brett Cannon74bfd702003-04-25 09:39:47 +0000728 Reserved characters : ";/?:@&=+$,"
729 Have special meaning in URIs and must be escaped if not being used for
730 their special meaning
731 Data characters : letters, digits, and "-_.!~*'()"
732 Unreserved and do not need to be escaped; can be, though, if desired
733 Control characters : 0x00 - 0x1F, 0x7F
734 Have no use in URIs so must be escaped
735 space : 0x20
736 Must be escaped
737 Delimiters : '<>#%"'
738 Must be escaped
739 Unwise : "{}|\^[]`"
740 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000741
Brett Cannon74bfd702003-04-25 09:39:47 +0000742 """
743
744 def test_never_quote(self):
745 # Make sure quote() does not quote letters, digits, and "_,.-"
746 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
747 "abcdefghijklmnopqrstuvwxyz",
748 "0123456789",
749 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000750 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000751 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000752 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000753 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000754 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000755 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000756
757 def test_default_safe(self):
758 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000759 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000760
761 def test_safe(self):
762 # Test setting 'safe' parameter does what it should do
763 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000764 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000765 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000766 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000767 result = urllib.parse.quote_plus(quote_by_default,
768 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000769 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000770 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000771 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000772 # Safe expressed as bytes rather than str
773 result = urllib.parse.quote(quote_by_default, safe=b"<>")
774 self.assertEqual(quote_by_default, result,
775 "using quote(): %r != %r" % (quote_by_default, result))
776 # "Safe" non-ASCII characters should have no effect
777 # (Since URIs are not allowed to have non-ASCII characters)
778 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
779 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
780 self.assertEqual(expect, result,
781 "using quote(): %r != %r" %
782 (expect, result))
783 # Same as above, but using a bytes rather than str
784 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
785 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
786 self.assertEqual(expect, result,
787 "using quote(): %r != %r" %
788 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000789
790 def test_default_quoting(self):
791 # Make sure all characters that should be quoted are by default sans
792 # space (separate test for that).
793 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
794 should_quote.append('<>#%"{}|\^[]`')
795 should_quote.append(chr(127)) # For 0x7F
796 should_quote = ''.join(should_quote)
797 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000798 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000799 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000800 "using quote(): "
801 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000802 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000803 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000804 self.assertEqual(hexescape(char), result,
805 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000806 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000807 (char, hexescape(char), result))
808 del should_quote
809 partial_quote = "ab[]cd"
810 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000811 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000812 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000813 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800814 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000815 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000816 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000817
818 def test_quoting_space(self):
819 # Make sure quote() and quote_plus() handle spaces as specified in
820 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000821 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000822 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000823 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000824 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000825 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000826 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000827 given = "a b cd e f"
828 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000829 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000830 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000831 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000832 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000833 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000834 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000835 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000836
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000837 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000838 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000839 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000840 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000841 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000842 # Test with bytes
843 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
844 'alpha%2Bbeta+gamma')
845 # Test with safe bytes
846 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
847 'alpha+beta+gamma')
848
849 def test_quote_bytes(self):
850 # Bytes should quote directly to percent-encoded values
851 given = b"\xa2\xd8ab\xff"
852 expect = "%A2%D8ab%FF"
853 result = urllib.parse.quote(given)
854 self.assertEqual(expect, result,
855 "using quote(): %r != %r" % (expect, result))
856 # Encoding argument should raise type error on bytes input
857 self.assertRaises(TypeError, urllib.parse.quote, given,
858 encoding="latin-1")
859 # quote_from_bytes should work the same
860 result = urllib.parse.quote_from_bytes(given)
861 self.assertEqual(expect, result,
862 "using quote_from_bytes(): %r != %r"
863 % (expect, result))
864
865 def test_quote_with_unicode(self):
866 # Characters in Latin-1 range, encoded by default in UTF-8
867 given = "\xa2\xd8ab\xff"
868 expect = "%C2%A2%C3%98ab%C3%BF"
869 result = urllib.parse.quote(given)
870 self.assertEqual(expect, result,
871 "using quote(): %r != %r" % (expect, result))
872 # Characters in Latin-1 range, encoded by with None (default)
873 result = urllib.parse.quote(given, encoding=None, errors=None)
874 self.assertEqual(expect, result,
875 "using quote(): %r != %r" % (expect, result))
876 # Characters in Latin-1 range, encoded with Latin-1
877 given = "\xa2\xd8ab\xff"
878 expect = "%A2%D8ab%FF"
879 result = urllib.parse.quote(given, encoding="latin-1")
880 self.assertEqual(expect, result,
881 "using quote(): %r != %r" % (expect, result))
882 # Characters in BMP, encoded by default in UTF-8
883 given = "\u6f22\u5b57" # "Kanji"
884 expect = "%E6%BC%A2%E5%AD%97"
885 result = urllib.parse.quote(given)
886 self.assertEqual(expect, result,
887 "using quote(): %r != %r" % (expect, result))
888 # Characters in BMP, encoded with Latin-1
889 given = "\u6f22\u5b57"
890 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
891 encoding="latin-1")
892 # Characters in BMP, encoded with Latin-1, with replace error handling
893 given = "\u6f22\u5b57"
894 expect = "%3F%3F" # "??"
895 result = urllib.parse.quote(given, encoding="latin-1",
896 errors="replace")
897 self.assertEqual(expect, result,
898 "using quote(): %r != %r" % (expect, result))
899 # Characters in BMP, Latin-1, with xmlcharref error handling
900 given = "\u6f22\u5b57"
901 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
902 result = urllib.parse.quote(given, encoding="latin-1",
903 errors="xmlcharrefreplace")
904 self.assertEqual(expect, result,
905 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000906
Georg Brandlfaf41492009-05-26 18:31:11 +0000907 def test_quote_plus_with_unicode(self):
908 # Encoding (latin-1) test for quote_plus
909 given = "\xa2\xd8 \xff"
910 expect = "%A2%D8+%FF"
911 result = urllib.parse.quote_plus(given, encoding="latin-1")
912 self.assertEqual(expect, result,
913 "using quote_plus(): %r != %r" % (expect, result))
914 # Errors test for quote_plus
915 given = "ab\u6f22\u5b57 cd"
916 expect = "ab%3F%3F+cd"
917 result = urllib.parse.quote_plus(given, encoding="latin-1",
918 errors="replace")
919 self.assertEqual(expect, result,
920 "using quote_plus(): %r != %r" % (expect, result))
921
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000922
Brett Cannon74bfd702003-04-25 09:39:47 +0000923class UnquotingTests(unittest.TestCase):
924 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000925
Brett Cannon74bfd702003-04-25 09:39:47 +0000926 See the doc string for quoting_Tests for details on quoting and such.
927
928 """
929
930 def test_unquoting(self):
931 # Make sure unquoting of all ASCII values works
932 escape_list = []
933 for num in range(128):
934 given = hexescape(chr(num))
935 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000936 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000937 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000938 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000939 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000940 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000941 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000942 (expect, result))
943 escape_list.append(given)
944 escape_string = ''.join(escape_list)
945 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000946 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000947 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000948 "using unquote(): not all characters escaped: "
949 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000950 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
951 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000952 with support.check_warnings(('', BytesWarning), quiet=True):
953 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000954
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000955 def test_unquoting_badpercent(self):
956 # Test unquoting on bad percent-escapes
957 given = '%xab'
958 expect = given
959 result = urllib.parse.unquote(given)
960 self.assertEqual(expect, result, "using unquote(): %r != %r"
961 % (expect, result))
962 given = '%x'
963 expect = given
964 result = urllib.parse.unquote(given)
965 self.assertEqual(expect, result, "using unquote(): %r != %r"
966 % (expect, result))
967 given = '%'
968 expect = given
969 result = urllib.parse.unquote(given)
970 self.assertEqual(expect, result, "using unquote(): %r != %r"
971 % (expect, result))
972 # unquote_to_bytes
973 given = '%xab'
974 expect = bytes(given, 'ascii')
975 result = urllib.parse.unquote_to_bytes(given)
976 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
977 % (expect, result))
978 given = '%x'
979 expect = bytes(given, 'ascii')
980 result = urllib.parse.unquote_to_bytes(given)
981 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
982 % (expect, result))
983 given = '%'
984 expect = bytes(given, 'ascii')
985 result = urllib.parse.unquote_to_bytes(given)
986 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
987 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000988 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
989 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000990
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000991 def test_unquoting_mixed_case(self):
992 # Test unquoting on mixed-case hex digits in the percent-escapes
993 given = '%Ab%eA'
994 expect = b'\xab\xea'
995 result = urllib.parse.unquote_to_bytes(given)
996 self.assertEqual(expect, result,
997 "using unquote_to_bytes(): %r != %r"
998 % (expect, result))
999
Brett Cannon74bfd702003-04-25 09:39:47 +00001000 def test_unquoting_parts(self):
1001 # Make sure unquoting works when have non-quoted characters
1002 # interspersed
1003 given = 'ab%sd' % hexescape('c')
1004 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001005 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001006 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001007 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001008 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001009 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001010 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001011
Brett Cannon74bfd702003-04-25 09:39:47 +00001012 def test_unquoting_plus(self):
1013 # Test difference between unquote() and unquote_plus()
1014 given = "are+there+spaces..."
1015 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001016 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001017 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001018 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001019 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001020 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001021 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001022 "using unquote_plus(): %r != %r" % (expect, result))
1023
1024 def test_unquote_to_bytes(self):
1025 given = 'br%C3%BCckner_sapporo_20050930.doc'
1026 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1027 result = urllib.parse.unquote_to_bytes(given)
1028 self.assertEqual(expect, result,
1029 "using unquote_to_bytes(): %r != %r"
1030 % (expect, result))
1031 # Test on a string with unescaped non-ASCII characters
1032 # (Technically an invalid URI; expect those characters to be UTF-8
1033 # encoded).
1034 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1035 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1036 self.assertEqual(expect, result,
1037 "using unquote_to_bytes(): %r != %r"
1038 % (expect, result))
1039 # Test with a bytes as input
1040 given = b'%A2%D8ab%FF'
1041 expect = b'\xa2\xd8ab\xff'
1042 result = urllib.parse.unquote_to_bytes(given)
1043 self.assertEqual(expect, result,
1044 "using unquote_to_bytes(): %r != %r"
1045 % (expect, result))
1046 # Test with a bytes as input, with unescaped non-ASCII bytes
1047 # (Technically an invalid URI; expect those bytes to be preserved)
1048 given = b'%A2\xd8ab%FF'
1049 expect = b'\xa2\xd8ab\xff'
1050 result = urllib.parse.unquote_to_bytes(given)
1051 self.assertEqual(expect, result,
1052 "using unquote_to_bytes(): %r != %r"
1053 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001054
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001055 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001056 # Characters in the Latin-1 range, encoded with UTF-8
1057 given = 'br%C3%BCckner_sapporo_20050930.doc'
1058 expect = 'br\u00fcckner_sapporo_20050930.doc'
1059 result = urllib.parse.unquote(given)
1060 self.assertEqual(expect, result,
1061 "using unquote(): %r != %r" % (expect, result))
1062 # Characters in the Latin-1 range, encoded with None (default)
1063 result = urllib.parse.unquote(given, encoding=None, errors=None)
1064 self.assertEqual(expect, result,
1065 "using unquote(): %r != %r" % (expect, result))
1066
1067 # Characters in the Latin-1 range, encoded with Latin-1
1068 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1069 encoding="latin-1")
1070 expect = 'br\u00fcckner_sapporo_20050930.doc'
1071 self.assertEqual(expect, result,
1072 "using unquote(): %r != %r" % (expect, result))
1073
1074 # Characters in BMP, encoded with UTF-8
1075 given = "%E6%BC%A2%E5%AD%97"
1076 expect = "\u6f22\u5b57" # "Kanji"
1077 result = urllib.parse.unquote(given)
1078 self.assertEqual(expect, result,
1079 "using unquote(): %r != %r" % (expect, result))
1080
1081 # Decode with UTF-8, invalid sequence
1082 given = "%F3%B1"
1083 expect = "\ufffd" # Replacement character
1084 result = urllib.parse.unquote(given)
1085 self.assertEqual(expect, result,
1086 "using unquote(): %r != %r" % (expect, result))
1087
1088 # Decode with UTF-8, invalid sequence, replace errors
1089 result = urllib.parse.unquote(given, errors="replace")
1090 self.assertEqual(expect, result,
1091 "using unquote(): %r != %r" % (expect, result))
1092
1093 # Decode with UTF-8, invalid sequence, ignoring errors
1094 given = "%F3%B1"
1095 expect = ""
1096 result = urllib.parse.unquote(given, errors="ignore")
1097 self.assertEqual(expect, result,
1098 "using unquote(): %r != %r" % (expect, result))
1099
1100 # A mix of non-ASCII and percent-encoded characters, UTF-8
1101 result = urllib.parse.unquote("\u6f22%C3%BC")
1102 expect = '\u6f22\u00fc'
1103 self.assertEqual(expect, result,
1104 "using unquote(): %r != %r" % (expect, result))
1105
1106 # A mix of non-ASCII and percent-encoded characters, Latin-1
1107 # (Note, the string contains non-Latin-1-representable characters)
1108 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1109 expect = '\u6f22\u00fc'
1110 self.assertEqual(expect, result,
1111 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001112
Brett Cannon74bfd702003-04-25 09:39:47 +00001113class urlencode_Tests(unittest.TestCase):
1114 """Tests for urlencode()"""
1115
1116 def help_inputtype(self, given, test_type):
1117 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001118
Brett Cannon74bfd702003-04-25 09:39:47 +00001119 'given' must lead to only the pairs:
1120 * 1st, 1
1121 * 2nd, 2
1122 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001123
Brett Cannon74bfd702003-04-25 09:39:47 +00001124 Test cannot assume anything about order. Docs make no guarantee and
1125 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001126
Brett Cannon74bfd702003-04-25 09:39:47 +00001127 """
1128 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001129 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001130 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001131 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001132 "testing %s: %s not found in %s" %
1133 (test_type, expected, result))
1134 self.assertEqual(result.count('&'), 2,
1135 "testing %s: expected 2 '&'s; got %s" %
1136 (test_type, result.count('&')))
1137 amp_location = result.index('&')
1138 on_amp_left = result[amp_location - 1]
1139 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001140 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001141 "testing %s: '&' not located in proper place in %s" %
1142 (test_type, result))
1143 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1144 "testing %s: "
1145 "unexpected number of characters: %s != %s" %
1146 (test_type, len(result), (5 * 3) + 2))
1147
1148 def test_using_mapping(self):
1149 # Test passing in a mapping object as an argument.
1150 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1151 "using dict as input type")
1152
1153 def test_using_sequence(self):
1154 # Test passing in a sequence of two-item sequences as an argument.
1155 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1156 "using sequence of two-item tuples as input")
1157
1158 def test_quoting(self):
1159 # Make sure keys and values are quoted using quote_plus()
1160 given = {"&":"="}
1161 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001162 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001163 self.assertEqual(expect, result)
1164 given = {"key name":"A bunch of pluses"}
1165 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001166 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001167 self.assertEqual(expect, result)
1168
1169 def test_doseq(self):
1170 # Test that passing True for 'doseq' parameter works correctly
1171 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001172 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1173 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001174 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001175 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001176 for value in given["sequence"]:
1177 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001178 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001179 self.assertEqual(result.count('&'), 2,
1180 "Expected 2 '&'s, got %s" % result.count('&'))
1181
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001182 def test_empty_sequence(self):
1183 self.assertEqual("", urllib.parse.urlencode({}))
1184 self.assertEqual("", urllib.parse.urlencode([]))
1185
1186 def test_nonstring_values(self):
1187 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1188 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1189
1190 def test_nonstring_seq_values(self):
1191 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1192 self.assertEqual("a=None&a=a",
1193 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001194 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001195 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001196 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001197
Senthil Kumarandf022da2010-07-03 17:48:22 +00001198 def test_urlencode_encoding(self):
1199 # ASCII encoding. Expect %3F with errors="replace'
1200 given = (('\u00a0', '\u00c1'),)
1201 expect = '%3F=%3F'
1202 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1203 self.assertEqual(expect, result)
1204
1205 # Default is UTF-8 encoding.
1206 given = (('\u00a0', '\u00c1'),)
1207 expect = '%C2%A0=%C3%81'
1208 result = urllib.parse.urlencode(given)
1209 self.assertEqual(expect, result)
1210
1211 # Latin-1 encoding.
1212 given = (('\u00a0', '\u00c1'),)
1213 expect = '%A0=%C1'
1214 result = urllib.parse.urlencode(given, encoding="latin-1")
1215 self.assertEqual(expect, result)
1216
1217 def test_urlencode_encoding_doseq(self):
1218 # ASCII Encoding. Expect %3F with errors="replace'
1219 given = (('\u00a0', '\u00c1'),)
1220 expect = '%3F=%3F'
1221 result = urllib.parse.urlencode(given, doseq=True,
1222 encoding="ASCII", errors="replace")
1223 self.assertEqual(expect, result)
1224
1225 # ASCII Encoding. On a sequence of values.
1226 given = (("\u00a0", (1, "\u00c1")),)
1227 expect = '%3F=1&%3F=%3F'
1228 result = urllib.parse.urlencode(given, True,
1229 encoding="ASCII", errors="replace")
1230 self.assertEqual(expect, result)
1231
1232 # Utf-8
1233 given = (("\u00a0", "\u00c1"),)
1234 expect = '%C2%A0=%C3%81'
1235 result = urllib.parse.urlencode(given, True)
1236 self.assertEqual(expect, result)
1237
1238 given = (("\u00a0", (42, "\u00c1")),)
1239 expect = '%C2%A0=42&%C2%A0=%C3%81'
1240 result = urllib.parse.urlencode(given, True)
1241 self.assertEqual(expect, result)
1242
1243 # latin-1
1244 given = (("\u00a0", "\u00c1"),)
1245 expect = '%A0=%C1'
1246 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1247 self.assertEqual(expect, result)
1248
1249 given = (("\u00a0", (42, "\u00c1")),)
1250 expect = '%A0=42&%A0=%C1'
1251 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1252 self.assertEqual(expect, result)
1253
1254 def test_urlencode_bytes(self):
1255 given = ((b'\xa0\x24', b'\xc1\x24'),)
1256 expect = '%A0%24=%C1%24'
1257 result = urllib.parse.urlencode(given)
1258 self.assertEqual(expect, result)
1259 result = urllib.parse.urlencode(given, True)
1260 self.assertEqual(expect, result)
1261
1262 # Sequence of values
1263 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1264 expect = '%A0%24=42&%A0%24=%C1%24'
1265 result = urllib.parse.urlencode(given, True)
1266 self.assertEqual(expect, result)
1267
1268 def test_urlencode_encoding_safe_parameter(self):
1269
1270 # Send '$' (\x24) as safe character
1271 # Default utf-8 encoding
1272
1273 given = ((b'\xa0\x24', b'\xc1\x24'),)
1274 result = urllib.parse.urlencode(given, safe=":$")
1275 expect = '%A0$=%C1$'
1276 self.assertEqual(expect, result)
1277
1278 given = ((b'\xa0\x24', b'\xc1\x24'),)
1279 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1280 expect = '%A0$=%C1$'
1281 self.assertEqual(expect, result)
1282
1283 # Safe parameter in sequence
1284 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1285 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1286 result = urllib.parse.urlencode(given, True, safe=":$")
1287 self.assertEqual(expect, result)
1288
1289 # Test all above in latin-1 encoding
1290
1291 given = ((b'\xa0\x24', b'\xc1\x24'),)
1292 result = urllib.parse.urlencode(given, safe=":$",
1293 encoding="latin-1")
1294 expect = '%A0$=%C1$'
1295 self.assertEqual(expect, result)
1296
1297 given = ((b'\xa0\x24', b'\xc1\x24'),)
1298 expect = '%A0$=%C1$'
1299 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1300 encoding="latin-1")
1301
1302 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1303 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1304 result = urllib.parse.urlencode(given, True, safe=":$",
1305 encoding="latin-1")
1306 self.assertEqual(expect, result)
1307
Brett Cannon74bfd702003-04-25 09:39:47 +00001308class Pathname_Tests(unittest.TestCase):
1309 """Test pathname2url() and url2pathname()"""
1310
1311 def test_basic(self):
1312 # Make sure simple tests pass
1313 expected_path = os.path.join("parts", "of", "a", "path")
1314 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001315 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001316 self.assertEqual(expected_url, result,
1317 "pathname2url() failed; %s != %s" %
1318 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001319 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001320 self.assertEqual(expected_path, result,
1321 "url2pathame() failed; %s != %s" %
1322 (result, expected_path))
1323
1324 def test_quoting(self):
1325 # Test automatic quoting and unquoting works for pathnam2url() and
1326 # url2pathname() respectively
1327 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001328 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1329 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001330 self.assertEqual(expect, result,
1331 "pathname2url() failed; %s != %s" %
1332 (expect, result))
1333 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001334 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001335 self.assertEqual(expect, result,
1336 "url2pathname() failed; %s != %s" %
1337 (expect, result))
1338 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001339 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1340 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001341 self.assertEqual(expect, result,
1342 "pathname2url() failed; %s != %s" %
1343 (expect, result))
1344 given = "make+sure/using_unquote"
1345 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001346 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001347 self.assertEqual(expect, result,
1348 "url2pathname() failed; %s != %s" %
1349 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001350
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001351 @unittest.skipUnless(sys.platform == 'win32',
1352 'test specific to the urllib.url2path function.')
1353 def test_ntpath(self):
1354 given = ('/C:/', '///C:/', '/C|//')
1355 expect = 'C:\\'
1356 for url in given:
1357 result = urllib.request.url2pathname(url)
1358 self.assertEqual(expect, result,
1359 'urllib.request..url2pathname() failed; %s != %s' %
1360 (expect, result))
1361 given = '///C|/path'
1362 expect = 'C:\\path'
1363 result = urllib.request.url2pathname(given)
1364 self.assertEqual(expect, result,
1365 'urllib.request.url2pathname() failed; %s != %s' %
1366 (expect, result))
1367
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001368class Utility_Tests(unittest.TestCase):
1369 """Testcase to test the various utility functions in the urllib."""
1370
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001371 def test_thishost(self):
1372 """Test the urllib.request.thishost utility function returns a tuple"""
1373 self.assertIsInstance(urllib.request.thishost(), tuple)
1374
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001375
1376class URLopener_Tests(unittest.TestCase):
1377 """Testcase to test the open method of URLopener class."""
1378
1379 def test_quoted_open(self):
1380 class DummyURLopener(urllib.request.URLopener):
1381 def open_spam(self, url):
1382 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001383 with support.check_warnings(
1384 ('DummyURLopener style of invoking requests is deprecated.',
1385 DeprecationWarning)):
1386 self.assertEqual(DummyURLopener().open(
1387 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001388
Ezio Melotti79b99db2013-02-21 02:41:42 +02001389 # test the safe characters are not quoted by urlopen
1390 self.assertEqual(DummyURLopener().open(
1391 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1392 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001393
Guido van Rossume7ba4952007-06-06 23:52:48 +00001394# Just commented them out.
1395# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001396# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001397# fail in one of the tests, sometimes in other. I have a linux, and
1398# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001399# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001400# . Facundo
1401#
1402# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001403# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001404# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1405# serv.settimeout(3)
1406# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1407# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001408# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001409# try:
1410# conn, addr = serv.accept()
1411# conn.send("1 Hola mundo\n")
1412# cantdata = 0
1413# while cantdata < 13:
1414# data = conn.recv(13-cantdata)
1415# cantdata += len(data)
1416# time.sleep(.3)
1417# conn.send("2 No more lines\n")
1418# conn.close()
1419# except socket.timeout:
1420# pass
1421# finally:
1422# serv.close()
1423# evt.set()
1424#
1425# class FTPWrapperTests(unittest.TestCase):
1426#
1427# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001428# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001429# ftplib.FTP.port = 9093
1430# self.evt = threading.Event()
1431# threading.Thread(target=server, args=(self.evt,)).start()
1432# time.sleep(.1)
1433#
1434# def tearDown(self):
1435# self.evt.wait()
1436#
1437# def testBasic(self):
1438# # connects
1439# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001440# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001441#
1442# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001443# # global default timeout is ignored
1444# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001445# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001446# socket.setdefaulttimeout(30)
1447# try:
1448# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1449# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001450# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001451# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001452# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001453#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001454# def testTimeoutDefault(self):
1455# # global default timeout is used
1456# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001457# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001458# socket.setdefaulttimeout(30)
1459# try:
1460# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1461# finally:
1462# socket.setdefaulttimeout(None)
1463# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1464# ftp.close()
1465#
1466# def testTimeoutValue(self):
1467# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1468# timeout=30)
1469# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1470# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001471
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001472
Senthil Kumarande49d642011-10-16 23:54:44 +08001473class RequestTests(unittest.TestCase):
1474 """Unit tests for urllib.request.Request."""
1475
1476 def test_default_values(self):
1477 Request = urllib.request.Request
1478 request = Request("http://www.python.org")
1479 self.assertEqual(request.get_method(), 'GET')
1480 request = Request("http://www.python.org", {})
1481 self.assertEqual(request.get_method(), 'POST')
1482
1483 def test_with_method_arg(self):
1484 Request = urllib.request.Request
1485 request = Request("http://www.python.org", method='HEAD')
1486 self.assertEqual(request.method, 'HEAD')
1487 self.assertEqual(request.get_method(), 'HEAD')
1488 request = Request("http://www.python.org", {}, method='HEAD')
1489 self.assertEqual(request.method, 'HEAD')
1490 self.assertEqual(request.get_method(), 'HEAD')
1491 request = Request("http://www.python.org", method='GET')
1492 self.assertEqual(request.get_method(), 'GET')
1493 request.method = 'HEAD'
1494 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001495
1496
Senthil Kumaran277e9092013-04-10 20:51:19 -07001497class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001498
Senthil Kumaran277e9092013-04-10 20:51:19 -07001499 def test_converting_drive_letter(self):
1500 self.assertEqual(url2pathname("///C|"), 'C:')
1501 self.assertEqual(url2pathname("///C:"), 'C:')
1502 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001503
Senthil Kumaran277e9092013-04-10 20:51:19 -07001504 def test_converting_when_no_drive_letter(self):
1505 # cannot end a raw string in \
1506 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1507 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1508
1509 def test_simple_compare(self):
1510 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1511 r'C:\foo\bar\spam.foo')
1512
1513 def test_non_ascii_drive_letter(self):
1514 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1515
1516 def test_roundtrip_url2pathname(self):
1517 list_of_paths = ['C:',
1518 r'\\\C\test\\',
1519 r'C:\foo\bar\spam.foo'
1520 ]
1521 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001522 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001523
1524class PathName2URLTests(unittest.TestCase):
1525
1526 def test_converting_drive_letter(self):
1527 self.assertEqual(pathname2url("C:"), '///C:')
1528 self.assertEqual(pathname2url("C:\\"), '///C:')
1529
1530 def test_converting_when_no_drive_letter(self):
1531 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1532 '/////folder/test/')
1533 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1534 '////folder/test/')
1535 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1536 '/folder/test/')
1537
1538 def test_simple_compare(self):
1539 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1540 "///C:/foo/bar/spam.foo" )
1541
1542 def test_long_drive_letter(self):
1543 self.assertRaises(IOError, pathname2url, "XX:\\")
1544
1545 def test_roundtrip_pathname2url(self):
1546 list_of_paths = ['///C:',
1547 '/////folder/test/',
1548 '///C:/foo/bar/spam.foo']
1549 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001550 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001551
1552if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001553 unittest.main()