blob: 58ca2a5cd84f1b236922fbd9a78852e6206e2436 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Ezio Melotti79b99db2013-02-21 02:41:42 +020042 with support.check_warnings(
43 ('FancyURLopener style of invoking requests is deprecated.',
44 DeprecationWarning)):
45 opener = urllib.request.FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 _urlopener = opener
47 else:
48 opener = _urlopener
49 if data is None:
50 return opener.open(url)
51 else:
52 return opener.open(url, data)
53
Senthil Kumarance260142011-11-01 01:35:17 +080054
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030055def fakehttp(fakedata):
56 class FakeSocket(io.BytesIO):
57 io_refs = 1
58
59 def sendall(self, data):
60 FakeHTTPConnection.buf = data
61
62 def makefile(self, *args, **kwds):
63 self.io_refs += 1
64 return self
65
66 def read(self, amt=None):
67 if self.closed:
68 return b""
69 return io.BytesIO.read(self, amt)
70
71 def readline(self, length=None):
72 if self.closed:
73 return b""
74 return io.BytesIO.readline(self, length)
75
76 def close(self):
77 self.io_refs -= 1
78 if self.io_refs == 0:
79 io.BytesIO.close(self)
80
81 class FakeHTTPConnection(http.client.HTTPConnection):
82
83 # buffer to store data for verification in urlopen tests.
84 buf = None
85 fakesock = FakeSocket(fakedata)
86
87 def connect(self):
88 self.sock = self.fakesock
89
90 return FakeHTTPConnection
91
92
Senthil Kumarance260142011-11-01 01:35:17 +080093class FakeHTTPMixin(object):
94 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +080095 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030096 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +080097
98 def unfakehttp(self):
99 http.client.HTTPConnection = self._connection_class
100
101
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700102class FakeFTPMixin(object):
103 def fakeftp(self):
104 class FakeFtpWrapper(object):
105 def __init__(self, user, passwd, host, port, dirs, timeout=None,
106 persistent=True):
107 pass
108
109 def retrfile(self, file, type):
110 return io.BytesIO(), 0
111
112 def close(self):
113 pass
114
115 self._ftpwrapper_class = urllib.request.ftpwrapper
116 urllib.request.ftpwrapper = FakeFtpWrapper
117
118 def unfakeftp(self):
119 urllib.request.ftpwrapper = self._ftpwrapper_class
120
121
Brett Cannon74bfd702003-04-25 09:39:47 +0000122class urlopen_FileTests(unittest.TestCase):
123 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000124
Brett Cannon74bfd702003-04-25 09:39:47 +0000125 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000126 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000127
Brett Cannon74bfd702003-04-25 09:39:47 +0000128 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000129
Brett Cannon74bfd702003-04-25 09:39:47 +0000130 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000131 # Create a temp file to use for testing
132 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
133 "ascii")
134 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000135 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000137 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000138 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000139 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000140 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000141
Brett Cannon74bfd702003-04-25 09:39:47 +0000142 def tearDown(self):
143 """Shut down the open object"""
144 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000145 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000146
Brett Cannon74bfd702003-04-25 09:39:47 +0000147 def test_interface(self):
148 # Make sure object returned by urlopen() has the specified methods
149 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000150 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000151 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 "object returned by urlopen() lacks %s attribute" %
153 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000154
Brett Cannon74bfd702003-04-25 09:39:47 +0000155 def test_read(self):
156 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000157
Brett Cannon74bfd702003-04-25 09:39:47 +0000158 def test_readline(self):
159 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000160 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000161 "calling readline() after exhausting the file did not"
162 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000163
Brett Cannon74bfd702003-04-25 09:39:47 +0000164 def test_readlines(self):
165 lines_list = self.returned_obj.readlines()
166 self.assertEqual(len(lines_list), 1,
167 "readlines() returned the wrong number of lines")
168 self.assertEqual(lines_list[0], self.text,
169 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000170
Brett Cannon74bfd702003-04-25 09:39:47 +0000171 def test_fileno(self):
172 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000173 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000174 self.assertEqual(os.read(file_num, len(self.text)), self.text,
175 "Reading on the file descriptor returned by fileno() "
176 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000177
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800179 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000180 # by the tearDown() method for the test
181 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000184 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000185
Brett Cannon74bfd702003-04-25 09:39:47 +0000186 def test_geturl(self):
187 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000188
Christian Heimes9bd667a2008-01-20 15:14:11 +0000189 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000190 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000191
Brett Cannon74bfd702003-04-25 09:39:47 +0000192 def test_iter(self):
193 # Test iterator
194 # Don't need to count number of iterations since test would fail the
195 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200196 # comparison.
197 # Use the iterator in the usual implicit way to test for ticket #4608.
198 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000199 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000200
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800201 def test_relativelocalfile(self):
202 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
203
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000204class ProxyTests(unittest.TestCase):
205
206 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000207 # Records changes to env vars
208 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000209 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000210 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000211 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000212 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000213
214 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000215 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000216 self.env.__exit__()
217 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000218
219 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000220 self.env.set('NO_PROXY', 'localhost')
221 proxies = urllib.request.getproxies_environment()
222 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000223 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800224 # List of no_proxies with space.
225 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
226 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000227
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700228class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000229 """Test urlopen() opening a fake http connection."""
230
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000231 def check_read(self, ver):
232 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000233 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000235 self.assertEqual(fp.readline(), b"Hello!")
236 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000237 self.assertEqual(fp.geturl(), 'http://python.org/')
238 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000239 finally:
240 self.unfakehttp()
241
Senthil Kumaran26430412011-04-13 07:01:19 +0800242 def test_url_fragment(self):
243 # Issue #11703: geturl() omits fragments in the original URL.
244 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800245 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800246 try:
247 fp = urllib.request.urlopen(url)
248 self.assertEqual(fp.geturl(), url)
249 finally:
250 self.unfakehttp()
251
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800252 def test_willclose(self):
253 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800254 try:
255 resp = urlopen("http://www.python.org")
256 self.assertTrue(resp.fp.will_close)
257 finally:
258 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800259
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000260 def test_read_0_9(self):
261 # "0.9" response accepted (but not "simple responses" without
262 # a status line)
263 self.check_read(b"0.9")
264
265 def test_read_1_0(self):
266 self.check_read(b"1.0")
267
268 def test_read_1_1(self):
269 self.check_read(b"1.1")
270
Christian Heimes57dddfb2008-01-02 18:30:52 +0000271 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200272 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000273 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
274Date: Wed, 02 Jan 2008 03:03:54 GMT
275Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
276Connection: close
277Content-Type: text/html; charset=iso-8859-1
278''')
279 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200280 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000281 finally:
282 self.unfakehttp()
283
guido@google.coma119df92011-03-29 11:41:02 -0700284 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200285 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700286 self.fakehttp(b'''HTTP/1.1 302 Found
287Date: Wed, 02 Jan 2008 03:03:54 GMT
288Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
289Location: file://guidocomputer.athome.com:/python/license
290Connection: close
291Content-Type: text/html; charset=iso-8859-1
292''')
293 try:
294 self.assertRaises(urllib.error.HTTPError, urlopen,
295 "http://python.org/")
296 finally:
297 self.unfakehttp()
298
Guido van Rossumd8faa362007-04-27 19:54:29 +0000299 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200300 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000301 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000302 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000303 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200304 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000305 finally:
306 self.unfakehttp()
307
Senthil Kumaranf5776862012-10-21 13:30:02 -0700308 def test_missing_localfile(self):
309 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700310 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700311 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700312 self.assertTrue(e.exception.filename)
313 self.assertTrue(e.exception.reason)
314
315 def test_file_notexists(self):
316 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700317 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700318 try:
319 self.assertTrue(os.path.exists(tmp_file))
320 with urlopen(tmp_fileurl) as fobj:
321 self.assertTrue(fobj)
322 finally:
323 os.close(fd)
324 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700325 self.assertFalse(os.path.exists(tmp_file))
326 with self.assertRaises(urllib.error.URLError):
327 urlopen(tmp_fileurl)
328
329 def test_ftp_nohost(self):
330 test_ftp_url = 'ftp:///path'
331 with self.assertRaises(urllib.error.URLError) as e:
332 urlopen(test_ftp_url)
333 self.assertFalse(e.exception.filename)
334 self.assertTrue(e.exception.reason)
335
336 def test_ftp_nonexisting(self):
337 with self.assertRaises(urllib.error.URLError) as e:
338 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
339 self.assertFalse(e.exception.filename)
340 self.assertTrue(e.exception.reason)
341
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700342 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
343 def test_ftp_cache_pruning(self):
344 self.fakeftp()
345 try:
346 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
347 urlopen('ftp://localhost')
348 finally:
349 self.unfakeftp()
350
Senthil Kumaranf5776862012-10-21 13:30:02 -0700351
Senthil Kumarande0eb242010-08-01 17:53:37 +0000352 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000353 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000354 try:
355 fp = urlopen("http://user:pass@python.org/")
356 self.assertEqual(fp.readline(), b"Hello!")
357 self.assertEqual(fp.readline(), b"")
358 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
359 self.assertEqual(fp.getcode(), 200)
360 finally:
361 self.unfakehttp()
362
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800363 def test_userpass_inurl_w_spaces(self):
364 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
365 try:
366 userpass = "a b:c d"
367 url = "http://{}@python.org/".format(userpass)
368 fakehttp_wrapper = http.client.HTTPConnection
369 authorization = ("Authorization: Basic %s\r\n" %
370 b64encode(userpass.encode("ASCII")).decode("ASCII"))
371 fp = urlopen(url)
372 # The authorization header must be in place
373 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
374 self.assertEqual(fp.readline(), b"Hello!")
375 self.assertEqual(fp.readline(), b"")
376 # the spaces are quoted in URL so no match
377 self.assertNotEqual(fp.geturl(), url)
378 self.assertEqual(fp.getcode(), 200)
379 finally:
380 self.unfakehttp()
381
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700382 def test_URLopener_deprecation(self):
383 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700384 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700385
Antoine Pitrou07df6552014-11-02 17:23:14 +0100386 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800387 def test_cafile_and_context(self):
388 context = ssl.create_default_context()
389 with self.assertRaises(ValueError):
390 urllib.request.urlopen(
391 "https://localhost", cafile="/nonexistent/path", context=context
392 )
393
Antoine Pitroudf204be2012-11-24 17:59:08 +0100394class urlopen_DataTests(unittest.TestCase):
395 """Test urlopen() opening a data URL."""
396
397 def setUp(self):
398 # text containing URL special- and unicode-characters
399 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
400 # 2x1 pixel RGB PNG image with one black and one white pixel
401 self.image = (
402 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
403 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
404 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
405 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
406
407 self.text_url = (
408 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
409 "D%26%20%C3%B6%20%C3%84%20")
410 self.text_url_base64 = (
411 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
412 "sJT0mIPYgxCA%3D")
413 # base64 encoded data URL that contains ignorable spaces,
414 # such as "\n", " ", "%0A", and "%20".
415 self.image_url = (
416 "\n"
417 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
418 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
419
420 self.text_url_resp = urllib.request.urlopen(self.text_url)
421 self.text_url_base64_resp = urllib.request.urlopen(
422 self.text_url_base64)
423 self.image_url_resp = urllib.request.urlopen(self.image_url)
424
425 def test_interface(self):
426 # Make sure object returned by urlopen() has the specified methods
427 for attr in ("read", "readline", "readlines",
428 "close", "info", "geturl", "getcode", "__iter__"):
429 self.assertTrue(hasattr(self.text_url_resp, attr),
430 "object returned by urlopen() lacks %s attribute" %
431 attr)
432
433 def test_info(self):
434 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
435 self.assertEqual(self.text_url_base64_resp.info().get_params(),
436 [('text/plain', ''), ('charset', 'ISO-8859-1')])
437 self.assertEqual(self.image_url_resp.info()['content-length'],
438 str(len(self.image)))
439 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
440 [('text/plain', ''), ('charset', 'US-ASCII')])
441
442 def test_geturl(self):
443 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
444 self.assertEqual(self.text_url_base64_resp.geturl(),
445 self.text_url_base64)
446 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
447
448 def test_read_text(self):
449 self.assertEqual(self.text_url_resp.read().decode(
450 dict(self.text_url_resp.info().get_params())['charset']), self.text)
451
452 def test_read_text_base64(self):
453 self.assertEqual(self.text_url_base64_resp.read().decode(
454 dict(self.text_url_base64_resp.info().get_params())['charset']),
455 self.text)
456
457 def test_read_image(self):
458 self.assertEqual(self.image_url_resp.read(), self.image)
459
460 def test_missing_comma(self):
461 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
462
463 def test_invalid_base64_data(self):
464 # missing padding character
465 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
466
Brett Cannon19691362003-04-29 05:08:06 +0000467class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000468 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000469
Brett Cannon19691362003-04-29 05:08:06 +0000470 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000471 # Create a list of temporary files. Each item in the list is a file
472 # name (absolute path or relative to the current working directory).
473 # All files in this list will be deleted in the tearDown method. Note,
474 # this only helps to makes sure temporary files get deleted, but it
475 # does nothing about trying to close files that may still be open. It
476 # is the responsibility of the developer to properly close files even
477 # when exceptional conditions occur.
478 self.tempFiles = []
479
Brett Cannon19691362003-04-29 05:08:06 +0000480 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000481 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000482 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000483 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000484 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000485 FILE.write(self.text)
486 FILE.close()
487 finally:
488 try: FILE.close()
489 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000490
491 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000492 # Delete the temporary files.
493 for each in self.tempFiles:
494 try: os.remove(each)
495 except: pass
496
497 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000498 filePath = os.path.abspath(filePath)
499 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000500 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000501 except UnicodeEncodeError:
502 raise unittest.SkipTest("filePath is not encodable to utf8")
503 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000504
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000505 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000506 """Creates a new temporary file containing the specified data,
507 registers the file for deletion during the test fixture tear down, and
508 returns the absolute path of the file."""
509
510 newFd, newFilePath = tempfile.mkstemp()
511 try:
512 self.registerFileForCleanUp(newFilePath)
513 newFile = os.fdopen(newFd, "wb")
514 newFile.write(data)
515 newFile.close()
516 finally:
517 try: newFile.close()
518 except: pass
519 return newFilePath
520
521 def registerFileForCleanUp(self, fileName):
522 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000523
524 def test_basic(self):
525 # Make sure that a local file just gets its own location returned and
526 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000527 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000528 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000529 self.assertIsInstance(result[1], email.message.Message,
530 "did not get a email.message.Message instance "
531 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000532
533 def test_copy(self):
534 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000535 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000536 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000537 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000538 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000539 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000540 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000541 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000542 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000543 try:
544 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000545 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000546 finally:
547 try: FILE.close()
548 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000549 self.assertEqual(self.text, text)
550
551 def test_reporthook(self):
552 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700553 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
554 self.assertIsInstance(block_count, int)
555 self.assertIsInstance(block_read_size, int)
556 self.assertIsInstance(file_size, int)
557 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000558 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000559 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000560 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000561 urllib.request.urlretrieve(
562 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000563 second_temp, hooktester)
564
565 def test_reporthook_0_bytes(self):
566 # Test on zero length file. Should call reporthook only 1 time.
567 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700568 def hooktester(block_count, block_read_size, file_size, _report=report):
569 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000570 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000571 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000572 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000573 self.assertEqual(len(report), 1)
574 self.assertEqual(report[0][2], 0)
575
576 def test_reporthook_5_bytes(self):
577 # Test on 5 byte file. Should call reporthook only 2 times (once when
578 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700579 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000580 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700581 def hooktester(block_count, block_read_size, file_size, _report=report):
582 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000583 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000584 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000585 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000586 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800587 self.assertEqual(report[0][2], 5)
588 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000589
590 def test_reporthook_8193_bytes(self):
591 # Test on 8193 byte file. Should call reporthook only 3 times (once
592 # when the "network connection" is established, once for the next 8192
593 # bytes, and once for the last byte).
594 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700595 def hooktester(block_count, block_read_size, file_size, _report=report):
596 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000597 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000598 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000599 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000600 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800601 self.assertEqual(report[0][2], 8193)
602 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700603 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800604 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000605
Senthil Kumarance260142011-11-01 01:35:17 +0800606
607class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
608 """Test urllib.urlretrieve() using fake http connections"""
609
610 def test_short_content_raises_ContentTooShortError(self):
611 self.fakehttp(b'''HTTP/1.1 200 OK
612Date: Wed, 02 Jan 2008 03:03:54 GMT
613Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
614Connection: close
615Content-Length: 100
616Content-Type: text/html; charset=iso-8859-1
617
618FF
619''')
620
621 def _reporthook(par1, par2, par3):
622 pass
623
624 with self.assertRaises(urllib.error.ContentTooShortError):
625 try:
626 urllib.request.urlretrieve('http://example.com/',
627 reporthook=_reporthook)
628 finally:
629 self.unfakehttp()
630
631 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
632 self.fakehttp(b'''HTTP/1.1 200 OK
633Date: Wed, 02 Jan 2008 03:03:54 GMT
634Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
635Connection: close
636Content-Length: 100
637Content-Type: text/html; charset=iso-8859-1
638
639FF
640''')
641 with self.assertRaises(urllib.error.ContentTooShortError):
642 try:
643 urllib.request.urlretrieve('http://example.com/')
644 finally:
645 self.unfakehttp()
646
647
Brett Cannon74bfd702003-04-25 09:39:47 +0000648class QuotingTests(unittest.TestCase):
649 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000650
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000651 According to RFC 2396 (Uniform Resource Identifiers), to escape a
652 character you write it as '%' + <2 character US-ASCII hex value>.
653 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
654 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000655
656 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000657
Brett Cannon74bfd702003-04-25 09:39:47 +0000658 Reserved characters : ";/?:@&=+$,"
659 Have special meaning in URIs and must be escaped if not being used for
660 their special meaning
661 Data characters : letters, digits, and "-_.!~*'()"
662 Unreserved and do not need to be escaped; can be, though, if desired
663 Control characters : 0x00 - 0x1F, 0x7F
664 Have no use in URIs so must be escaped
665 space : 0x20
666 Must be escaped
667 Delimiters : '<>#%"'
668 Must be escaped
669 Unwise : "{}|\^[]`"
670 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000671
Brett Cannon74bfd702003-04-25 09:39:47 +0000672 """
673
674 def test_never_quote(self):
675 # Make sure quote() does not quote letters, digits, and "_,.-"
676 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
677 "abcdefghijklmnopqrstuvwxyz",
678 "0123456789",
679 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000680 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000681 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000682 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000683 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000684 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000685 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000686
687 def test_default_safe(self):
688 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000689 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000690
691 def test_safe(self):
692 # Test setting 'safe' parameter does what it should do
693 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000694 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000695 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000696 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000697 result = urllib.parse.quote_plus(quote_by_default,
698 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000699 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000700 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000701 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000702 # Safe expressed as bytes rather than str
703 result = urllib.parse.quote(quote_by_default, safe=b"<>")
704 self.assertEqual(quote_by_default, result,
705 "using quote(): %r != %r" % (quote_by_default, result))
706 # "Safe" non-ASCII characters should have no effect
707 # (Since URIs are not allowed to have non-ASCII characters)
708 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
709 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
710 self.assertEqual(expect, result,
711 "using quote(): %r != %r" %
712 (expect, result))
713 # Same as above, but using a bytes rather than str
714 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
715 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
716 self.assertEqual(expect, result,
717 "using quote(): %r != %r" %
718 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000719
720 def test_default_quoting(self):
721 # Make sure all characters that should be quoted are by default sans
722 # space (separate test for that).
723 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
724 should_quote.append('<>#%"{}|\^[]`')
725 should_quote.append(chr(127)) # For 0x7F
726 should_quote = ''.join(should_quote)
727 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000728 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000729 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000730 "using quote(): "
731 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000732 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000733 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000734 self.assertEqual(hexescape(char), result,
735 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000736 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000737 (char, hexescape(char), result))
738 del should_quote
739 partial_quote = "ab[]cd"
740 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000741 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000742 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000743 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800744 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000745 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000746 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000747
748 def test_quoting_space(self):
749 # Make sure quote() and quote_plus() handle spaces as specified in
750 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000751 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000752 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000753 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000754 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000755 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000756 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000757 given = "a b cd e f"
758 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000759 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000760 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000761 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000762 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000763 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000764 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000765 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000766
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000767 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000768 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000769 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000770 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000771 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000772 # Test with bytes
773 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
774 'alpha%2Bbeta+gamma')
775 # Test with safe bytes
776 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
777 'alpha+beta+gamma')
778
779 def test_quote_bytes(self):
780 # Bytes should quote directly to percent-encoded values
781 given = b"\xa2\xd8ab\xff"
782 expect = "%A2%D8ab%FF"
783 result = urllib.parse.quote(given)
784 self.assertEqual(expect, result,
785 "using quote(): %r != %r" % (expect, result))
786 # Encoding argument should raise type error on bytes input
787 self.assertRaises(TypeError, urllib.parse.quote, given,
788 encoding="latin-1")
789 # quote_from_bytes should work the same
790 result = urllib.parse.quote_from_bytes(given)
791 self.assertEqual(expect, result,
792 "using quote_from_bytes(): %r != %r"
793 % (expect, result))
794
795 def test_quote_with_unicode(self):
796 # Characters in Latin-1 range, encoded by default in UTF-8
797 given = "\xa2\xd8ab\xff"
798 expect = "%C2%A2%C3%98ab%C3%BF"
799 result = urllib.parse.quote(given)
800 self.assertEqual(expect, result,
801 "using quote(): %r != %r" % (expect, result))
802 # Characters in Latin-1 range, encoded by with None (default)
803 result = urllib.parse.quote(given, encoding=None, errors=None)
804 self.assertEqual(expect, result,
805 "using quote(): %r != %r" % (expect, result))
806 # Characters in Latin-1 range, encoded with Latin-1
807 given = "\xa2\xd8ab\xff"
808 expect = "%A2%D8ab%FF"
809 result = urllib.parse.quote(given, encoding="latin-1")
810 self.assertEqual(expect, result,
811 "using quote(): %r != %r" % (expect, result))
812 # Characters in BMP, encoded by default in UTF-8
813 given = "\u6f22\u5b57" # "Kanji"
814 expect = "%E6%BC%A2%E5%AD%97"
815 result = urllib.parse.quote(given)
816 self.assertEqual(expect, result,
817 "using quote(): %r != %r" % (expect, result))
818 # Characters in BMP, encoded with Latin-1
819 given = "\u6f22\u5b57"
820 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
821 encoding="latin-1")
822 # Characters in BMP, encoded with Latin-1, with replace error handling
823 given = "\u6f22\u5b57"
824 expect = "%3F%3F" # "??"
825 result = urllib.parse.quote(given, encoding="latin-1",
826 errors="replace")
827 self.assertEqual(expect, result,
828 "using quote(): %r != %r" % (expect, result))
829 # Characters in BMP, Latin-1, with xmlcharref error handling
830 given = "\u6f22\u5b57"
831 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
832 result = urllib.parse.quote(given, encoding="latin-1",
833 errors="xmlcharrefreplace")
834 self.assertEqual(expect, result,
835 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000836
Georg Brandlfaf41492009-05-26 18:31:11 +0000837 def test_quote_plus_with_unicode(self):
838 # Encoding (latin-1) test for quote_plus
839 given = "\xa2\xd8 \xff"
840 expect = "%A2%D8+%FF"
841 result = urllib.parse.quote_plus(given, encoding="latin-1")
842 self.assertEqual(expect, result,
843 "using quote_plus(): %r != %r" % (expect, result))
844 # Errors test for quote_plus
845 given = "ab\u6f22\u5b57 cd"
846 expect = "ab%3F%3F+cd"
847 result = urllib.parse.quote_plus(given, encoding="latin-1",
848 errors="replace")
849 self.assertEqual(expect, result,
850 "using quote_plus(): %r != %r" % (expect, result))
851
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000852
Brett Cannon74bfd702003-04-25 09:39:47 +0000853class UnquotingTests(unittest.TestCase):
854 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000855
Brett Cannon74bfd702003-04-25 09:39:47 +0000856 See the doc string for quoting_Tests for details on quoting and such.
857
858 """
859
860 def test_unquoting(self):
861 # Make sure unquoting of all ASCII values works
862 escape_list = []
863 for num in range(128):
864 given = hexescape(chr(num))
865 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000866 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000867 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000868 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000869 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000870 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000871 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000872 (expect, result))
873 escape_list.append(given)
874 escape_string = ''.join(escape_list)
875 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000876 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000877 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000878 "using unquote(): not all characters escaped: "
879 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000880 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
881 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000882 with support.check_warnings(('', BytesWarning), quiet=True):
883 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000884
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000885 def test_unquoting_badpercent(self):
886 # Test unquoting on bad percent-escapes
887 given = '%xab'
888 expect = given
889 result = urllib.parse.unquote(given)
890 self.assertEqual(expect, result, "using unquote(): %r != %r"
891 % (expect, result))
892 given = '%x'
893 expect = given
894 result = urllib.parse.unquote(given)
895 self.assertEqual(expect, result, "using unquote(): %r != %r"
896 % (expect, result))
897 given = '%'
898 expect = given
899 result = urllib.parse.unquote(given)
900 self.assertEqual(expect, result, "using unquote(): %r != %r"
901 % (expect, result))
902 # unquote_to_bytes
903 given = '%xab'
904 expect = bytes(given, 'ascii')
905 result = urllib.parse.unquote_to_bytes(given)
906 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
907 % (expect, result))
908 given = '%x'
909 expect = bytes(given, 'ascii')
910 result = urllib.parse.unquote_to_bytes(given)
911 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
912 % (expect, result))
913 given = '%'
914 expect = bytes(given, 'ascii')
915 result = urllib.parse.unquote_to_bytes(given)
916 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
917 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000918 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
919 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000920
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000921 def test_unquoting_mixed_case(self):
922 # Test unquoting on mixed-case hex digits in the percent-escapes
923 given = '%Ab%eA'
924 expect = b'\xab\xea'
925 result = urllib.parse.unquote_to_bytes(given)
926 self.assertEqual(expect, result,
927 "using unquote_to_bytes(): %r != %r"
928 % (expect, result))
929
Brett Cannon74bfd702003-04-25 09:39:47 +0000930 def test_unquoting_parts(self):
931 # Make sure unquoting works when have non-quoted characters
932 # interspersed
933 given = 'ab%sd' % hexescape('c')
934 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000935 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000936 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000937 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000938 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000939 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000940 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000941
Brett Cannon74bfd702003-04-25 09:39:47 +0000942 def test_unquoting_plus(self):
943 # Test difference between unquote() and unquote_plus()
944 given = "are+there+spaces..."
945 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000946 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000947 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000948 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000949 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000950 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000951 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000952 "using unquote_plus(): %r != %r" % (expect, result))
953
954 def test_unquote_to_bytes(self):
955 given = 'br%C3%BCckner_sapporo_20050930.doc'
956 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
957 result = urllib.parse.unquote_to_bytes(given)
958 self.assertEqual(expect, result,
959 "using unquote_to_bytes(): %r != %r"
960 % (expect, result))
961 # Test on a string with unescaped non-ASCII characters
962 # (Technically an invalid URI; expect those characters to be UTF-8
963 # encoded).
964 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
965 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
966 self.assertEqual(expect, result,
967 "using unquote_to_bytes(): %r != %r"
968 % (expect, result))
969 # Test with a bytes as input
970 given = b'%A2%D8ab%FF'
971 expect = b'\xa2\xd8ab\xff'
972 result = urllib.parse.unquote_to_bytes(given)
973 self.assertEqual(expect, result,
974 "using unquote_to_bytes(): %r != %r"
975 % (expect, result))
976 # Test with a bytes as input, with unescaped non-ASCII bytes
977 # (Technically an invalid URI; expect those bytes to be preserved)
978 given = b'%A2\xd8ab%FF'
979 expect = b'\xa2\xd8ab\xff'
980 result = urllib.parse.unquote_to_bytes(given)
981 self.assertEqual(expect, result,
982 "using unquote_to_bytes(): %r != %r"
983 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000984
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000985 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000986 # Characters in the Latin-1 range, encoded with UTF-8
987 given = 'br%C3%BCckner_sapporo_20050930.doc'
988 expect = 'br\u00fcckner_sapporo_20050930.doc'
989 result = urllib.parse.unquote(given)
990 self.assertEqual(expect, result,
991 "using unquote(): %r != %r" % (expect, result))
992 # Characters in the Latin-1 range, encoded with None (default)
993 result = urllib.parse.unquote(given, encoding=None, errors=None)
994 self.assertEqual(expect, result,
995 "using unquote(): %r != %r" % (expect, result))
996
997 # Characters in the Latin-1 range, encoded with Latin-1
998 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
999 encoding="latin-1")
1000 expect = 'br\u00fcckner_sapporo_20050930.doc'
1001 self.assertEqual(expect, result,
1002 "using unquote(): %r != %r" % (expect, result))
1003
1004 # Characters in BMP, encoded with UTF-8
1005 given = "%E6%BC%A2%E5%AD%97"
1006 expect = "\u6f22\u5b57" # "Kanji"
1007 result = urllib.parse.unquote(given)
1008 self.assertEqual(expect, result,
1009 "using unquote(): %r != %r" % (expect, result))
1010
1011 # Decode with UTF-8, invalid sequence
1012 given = "%F3%B1"
1013 expect = "\ufffd" # Replacement character
1014 result = urllib.parse.unquote(given)
1015 self.assertEqual(expect, result,
1016 "using unquote(): %r != %r" % (expect, result))
1017
1018 # Decode with UTF-8, invalid sequence, replace errors
1019 result = urllib.parse.unquote(given, errors="replace")
1020 self.assertEqual(expect, result,
1021 "using unquote(): %r != %r" % (expect, result))
1022
1023 # Decode with UTF-8, invalid sequence, ignoring errors
1024 given = "%F3%B1"
1025 expect = ""
1026 result = urllib.parse.unquote(given, errors="ignore")
1027 self.assertEqual(expect, result,
1028 "using unquote(): %r != %r" % (expect, result))
1029
1030 # A mix of non-ASCII and percent-encoded characters, UTF-8
1031 result = urllib.parse.unquote("\u6f22%C3%BC")
1032 expect = '\u6f22\u00fc'
1033 self.assertEqual(expect, result,
1034 "using unquote(): %r != %r" % (expect, result))
1035
1036 # A mix of non-ASCII and percent-encoded characters, Latin-1
1037 # (Note, the string contains non-Latin-1-representable characters)
1038 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1039 expect = '\u6f22\u00fc'
1040 self.assertEqual(expect, result,
1041 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001042
Brett Cannon74bfd702003-04-25 09:39:47 +00001043class urlencode_Tests(unittest.TestCase):
1044 """Tests for urlencode()"""
1045
1046 def help_inputtype(self, given, test_type):
1047 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001048
Brett Cannon74bfd702003-04-25 09:39:47 +00001049 'given' must lead to only the pairs:
1050 * 1st, 1
1051 * 2nd, 2
1052 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001053
Brett Cannon74bfd702003-04-25 09:39:47 +00001054 Test cannot assume anything about order. Docs make no guarantee and
1055 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001056
Brett Cannon74bfd702003-04-25 09:39:47 +00001057 """
1058 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001059 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001060 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001061 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001062 "testing %s: %s not found in %s" %
1063 (test_type, expected, result))
1064 self.assertEqual(result.count('&'), 2,
1065 "testing %s: expected 2 '&'s; got %s" %
1066 (test_type, result.count('&')))
1067 amp_location = result.index('&')
1068 on_amp_left = result[amp_location - 1]
1069 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001070 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001071 "testing %s: '&' not located in proper place in %s" %
1072 (test_type, result))
1073 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1074 "testing %s: "
1075 "unexpected number of characters: %s != %s" %
1076 (test_type, len(result), (5 * 3) + 2))
1077
1078 def test_using_mapping(self):
1079 # Test passing in a mapping object as an argument.
1080 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1081 "using dict as input type")
1082
1083 def test_using_sequence(self):
1084 # Test passing in a sequence of two-item sequences as an argument.
1085 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1086 "using sequence of two-item tuples as input")
1087
1088 def test_quoting(self):
1089 # Make sure keys and values are quoted using quote_plus()
1090 given = {"&":"="}
1091 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001092 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001093 self.assertEqual(expect, result)
1094 given = {"key name":"A bunch of pluses"}
1095 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001096 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001097 self.assertEqual(expect, result)
1098
1099 def test_doseq(self):
1100 # Test that passing True for 'doseq' parameter works correctly
1101 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001102 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1103 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001104 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001105 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001106 for value in given["sequence"]:
1107 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001108 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001109 self.assertEqual(result.count('&'), 2,
1110 "Expected 2 '&'s, got %s" % result.count('&'))
1111
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001112 def test_empty_sequence(self):
1113 self.assertEqual("", urllib.parse.urlencode({}))
1114 self.assertEqual("", urllib.parse.urlencode([]))
1115
1116 def test_nonstring_values(self):
1117 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1118 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1119
1120 def test_nonstring_seq_values(self):
1121 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1122 self.assertEqual("a=None&a=a",
1123 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001124 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001125 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001126 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001127
Senthil Kumarandf022da2010-07-03 17:48:22 +00001128 def test_urlencode_encoding(self):
1129 # ASCII encoding. Expect %3F with errors="replace'
1130 given = (('\u00a0', '\u00c1'),)
1131 expect = '%3F=%3F'
1132 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1133 self.assertEqual(expect, result)
1134
1135 # Default is UTF-8 encoding.
1136 given = (('\u00a0', '\u00c1'),)
1137 expect = '%C2%A0=%C3%81'
1138 result = urllib.parse.urlencode(given)
1139 self.assertEqual(expect, result)
1140
1141 # Latin-1 encoding.
1142 given = (('\u00a0', '\u00c1'),)
1143 expect = '%A0=%C1'
1144 result = urllib.parse.urlencode(given, encoding="latin-1")
1145 self.assertEqual(expect, result)
1146
1147 def test_urlencode_encoding_doseq(self):
1148 # ASCII Encoding. Expect %3F with errors="replace'
1149 given = (('\u00a0', '\u00c1'),)
1150 expect = '%3F=%3F'
1151 result = urllib.parse.urlencode(given, doseq=True,
1152 encoding="ASCII", errors="replace")
1153 self.assertEqual(expect, result)
1154
1155 # ASCII Encoding. On a sequence of values.
1156 given = (("\u00a0", (1, "\u00c1")),)
1157 expect = '%3F=1&%3F=%3F'
1158 result = urllib.parse.urlencode(given, True,
1159 encoding="ASCII", errors="replace")
1160 self.assertEqual(expect, result)
1161
1162 # Utf-8
1163 given = (("\u00a0", "\u00c1"),)
1164 expect = '%C2%A0=%C3%81'
1165 result = urllib.parse.urlencode(given, True)
1166 self.assertEqual(expect, result)
1167
1168 given = (("\u00a0", (42, "\u00c1")),)
1169 expect = '%C2%A0=42&%C2%A0=%C3%81'
1170 result = urllib.parse.urlencode(given, True)
1171 self.assertEqual(expect, result)
1172
1173 # latin-1
1174 given = (("\u00a0", "\u00c1"),)
1175 expect = '%A0=%C1'
1176 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1177 self.assertEqual(expect, result)
1178
1179 given = (("\u00a0", (42, "\u00c1")),)
1180 expect = '%A0=42&%A0=%C1'
1181 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1182 self.assertEqual(expect, result)
1183
1184 def test_urlencode_bytes(self):
1185 given = ((b'\xa0\x24', b'\xc1\x24'),)
1186 expect = '%A0%24=%C1%24'
1187 result = urllib.parse.urlencode(given)
1188 self.assertEqual(expect, result)
1189 result = urllib.parse.urlencode(given, True)
1190 self.assertEqual(expect, result)
1191
1192 # Sequence of values
1193 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1194 expect = '%A0%24=42&%A0%24=%C1%24'
1195 result = urllib.parse.urlencode(given, True)
1196 self.assertEqual(expect, result)
1197
1198 def test_urlencode_encoding_safe_parameter(self):
1199
1200 # Send '$' (\x24) as safe character
1201 # Default utf-8 encoding
1202
1203 given = ((b'\xa0\x24', b'\xc1\x24'),)
1204 result = urllib.parse.urlencode(given, safe=":$")
1205 expect = '%A0$=%C1$'
1206 self.assertEqual(expect, result)
1207
1208 given = ((b'\xa0\x24', b'\xc1\x24'),)
1209 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1210 expect = '%A0$=%C1$'
1211 self.assertEqual(expect, result)
1212
1213 # Safe parameter in sequence
1214 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1215 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1216 result = urllib.parse.urlencode(given, True, safe=":$")
1217 self.assertEqual(expect, result)
1218
1219 # Test all above in latin-1 encoding
1220
1221 given = ((b'\xa0\x24', b'\xc1\x24'),)
1222 result = urllib.parse.urlencode(given, safe=":$",
1223 encoding="latin-1")
1224 expect = '%A0$=%C1$'
1225 self.assertEqual(expect, result)
1226
1227 given = ((b'\xa0\x24', b'\xc1\x24'),)
1228 expect = '%A0$=%C1$'
1229 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1230 encoding="latin-1")
1231
1232 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1233 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1234 result = urllib.parse.urlencode(given, True, safe=":$",
1235 encoding="latin-1")
1236 self.assertEqual(expect, result)
1237
Brett Cannon74bfd702003-04-25 09:39:47 +00001238class Pathname_Tests(unittest.TestCase):
1239 """Test pathname2url() and url2pathname()"""
1240
1241 def test_basic(self):
1242 # Make sure simple tests pass
1243 expected_path = os.path.join("parts", "of", "a", "path")
1244 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001245 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001246 self.assertEqual(expected_url, result,
1247 "pathname2url() failed; %s != %s" %
1248 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001249 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001250 self.assertEqual(expected_path, result,
1251 "url2pathame() failed; %s != %s" %
1252 (result, expected_path))
1253
1254 def test_quoting(self):
1255 # Test automatic quoting and unquoting works for pathnam2url() and
1256 # url2pathname() respectively
1257 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001258 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1259 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001260 self.assertEqual(expect, result,
1261 "pathname2url() failed; %s != %s" %
1262 (expect, result))
1263 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001264 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001265 self.assertEqual(expect, result,
1266 "url2pathname() failed; %s != %s" %
1267 (expect, result))
1268 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001269 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1270 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001271 self.assertEqual(expect, result,
1272 "pathname2url() failed; %s != %s" %
1273 (expect, result))
1274 given = "make+sure/using_unquote"
1275 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001276 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001277 self.assertEqual(expect, result,
1278 "url2pathname() failed; %s != %s" %
1279 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001280
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001281 @unittest.skipUnless(sys.platform == 'win32',
1282 'test specific to the urllib.url2path function.')
1283 def test_ntpath(self):
1284 given = ('/C:/', '///C:/', '/C|//')
1285 expect = 'C:\\'
1286 for url in given:
1287 result = urllib.request.url2pathname(url)
1288 self.assertEqual(expect, result,
1289 'urllib.request..url2pathname() failed; %s != %s' %
1290 (expect, result))
1291 given = '///C|/path'
1292 expect = 'C:\\path'
1293 result = urllib.request.url2pathname(given)
1294 self.assertEqual(expect, result,
1295 'urllib.request.url2pathname() failed; %s != %s' %
1296 (expect, result))
1297
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001298class Utility_Tests(unittest.TestCase):
1299 """Testcase to test the various utility functions in the urllib."""
1300
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001301 def test_thishost(self):
1302 """Test the urllib.request.thishost utility function returns a tuple"""
1303 self.assertIsInstance(urllib.request.thishost(), tuple)
1304
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001305
1306class URLopener_Tests(unittest.TestCase):
1307 """Testcase to test the open method of URLopener class."""
1308
1309 def test_quoted_open(self):
1310 class DummyURLopener(urllib.request.URLopener):
1311 def open_spam(self, url):
1312 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001313 with support.check_warnings(
1314 ('DummyURLopener style of invoking requests is deprecated.',
1315 DeprecationWarning)):
1316 self.assertEqual(DummyURLopener().open(
1317 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001318
Ezio Melotti79b99db2013-02-21 02:41:42 +02001319 # test the safe characters are not quoted by urlopen
1320 self.assertEqual(DummyURLopener().open(
1321 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1322 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001323
Guido van Rossume7ba4952007-06-06 23:52:48 +00001324# Just commented them out.
1325# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001326# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001327# fail in one of the tests, sometimes in other. I have a linux, and
1328# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001329# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001330# . Facundo
1331#
1332# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001333# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001334# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1335# serv.settimeout(3)
1336# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1337# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001338# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001339# try:
1340# conn, addr = serv.accept()
1341# conn.send("1 Hola mundo\n")
1342# cantdata = 0
1343# while cantdata < 13:
1344# data = conn.recv(13-cantdata)
1345# cantdata += len(data)
1346# time.sleep(.3)
1347# conn.send("2 No more lines\n")
1348# conn.close()
1349# except socket.timeout:
1350# pass
1351# finally:
1352# serv.close()
1353# evt.set()
1354#
1355# class FTPWrapperTests(unittest.TestCase):
1356#
1357# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001358# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001359# ftplib.FTP.port = 9093
1360# self.evt = threading.Event()
1361# threading.Thread(target=server, args=(self.evt,)).start()
1362# time.sleep(.1)
1363#
1364# def tearDown(self):
1365# self.evt.wait()
1366#
1367# def testBasic(self):
1368# # connects
1369# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001370# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001371#
1372# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001373# # global default timeout is ignored
1374# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001375# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001376# socket.setdefaulttimeout(30)
1377# try:
1378# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1379# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001380# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001381# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001382# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001383#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001384# def testTimeoutDefault(self):
1385# # global default timeout is used
1386# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001387# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001388# socket.setdefaulttimeout(30)
1389# try:
1390# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1391# finally:
1392# socket.setdefaulttimeout(None)
1393# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1394# ftp.close()
1395#
1396# def testTimeoutValue(self):
1397# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1398# timeout=30)
1399# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1400# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001401
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001402
Senthil Kumarande49d642011-10-16 23:54:44 +08001403class RequestTests(unittest.TestCase):
1404 """Unit tests for urllib.request.Request."""
1405
1406 def test_default_values(self):
1407 Request = urllib.request.Request
1408 request = Request("http://www.python.org")
1409 self.assertEqual(request.get_method(), 'GET')
1410 request = Request("http://www.python.org", {})
1411 self.assertEqual(request.get_method(), 'POST')
1412
1413 def test_with_method_arg(self):
1414 Request = urllib.request.Request
1415 request = Request("http://www.python.org", method='HEAD')
1416 self.assertEqual(request.method, 'HEAD')
1417 self.assertEqual(request.get_method(), 'HEAD')
1418 request = Request("http://www.python.org", {}, method='HEAD')
1419 self.assertEqual(request.method, 'HEAD')
1420 self.assertEqual(request.get_method(), 'HEAD')
1421 request = Request("http://www.python.org", method='GET')
1422 self.assertEqual(request.get_method(), 'GET')
1423 request.method = 'HEAD'
1424 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001425
1426
Senthil Kumaran277e9092013-04-10 20:51:19 -07001427class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001428
Senthil Kumaran277e9092013-04-10 20:51:19 -07001429 def test_converting_drive_letter(self):
1430 self.assertEqual(url2pathname("///C|"), 'C:')
1431 self.assertEqual(url2pathname("///C:"), 'C:')
1432 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001433
Senthil Kumaran277e9092013-04-10 20:51:19 -07001434 def test_converting_when_no_drive_letter(self):
1435 # cannot end a raw string in \
1436 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1437 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1438
1439 def test_simple_compare(self):
1440 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1441 r'C:\foo\bar\spam.foo')
1442
1443 def test_non_ascii_drive_letter(self):
1444 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1445
1446 def test_roundtrip_url2pathname(self):
1447 list_of_paths = ['C:',
1448 r'\\\C\test\\',
1449 r'C:\foo\bar\spam.foo'
1450 ]
1451 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001452 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001453
1454class PathName2URLTests(unittest.TestCase):
1455
1456 def test_converting_drive_letter(self):
1457 self.assertEqual(pathname2url("C:"), '///C:')
1458 self.assertEqual(pathname2url("C:\\"), '///C:')
1459
1460 def test_converting_when_no_drive_letter(self):
1461 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1462 '/////folder/test/')
1463 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1464 '////folder/test/')
1465 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1466 '/folder/test/')
1467
1468 def test_simple_compare(self):
1469 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1470 "///C:/foo/bar/spam.foo" )
1471
1472 def test_long_drive_letter(self):
1473 self.assertRaises(IOError, pathname2url, "XX:\\")
1474
1475 def test_roundtrip_pathname2url(self):
1476 list_of_paths = ['///C:',
1477 '/////folder/test/',
1478 '///C:/foo/bar/spam.foo']
1479 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001480 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001481
1482if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001483 unittest.main()