blob: e197a3f92eaf7aabf020ccd0221a7a70dbacd2be [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Antoine Pitrou07df6552014-11-02 17:23:14 +010013try:
14 import ssl
15except ImportError:
16 ssl = None
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080017import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000018import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070019from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080021from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010022import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080023
Senthil Kumaran8b081b72013-04-10 20:53:12 -070024
Brett Cannon74bfd702003-04-25 09:39:47 +000025def hexescape(char):
26 """Escape char as RFC 2396 specifies"""
27 hex_repr = hex(ord(char))[2:].upper()
28 if len(hex_repr) == 1:
29 hex_repr = "0%s" % hex_repr
30 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000031
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032# Shortcut for testing FancyURLopener
33_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070034
35
Jeremy Hylton1afc1692008-06-18 20:49:58 +000036def urlopen(url, data=None, proxies=None):
37 """urlopen(url [, data]) -> open file-like object"""
38 global _urlopener
39 if proxies is not None:
40 opener = urllib.request.FancyURLopener(proxies=proxies)
41 elif not _urlopener:
Martin Pantera0370222016-02-04 06:01:35 +000042 opener = FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 _urlopener = opener
44 else:
45 opener = _urlopener
46 if data is None:
47 return opener.open(url)
48 else:
49 return opener.open(url, data)
50
Senthil Kumarance260142011-11-01 01:35:17 +080051
Martin Pantera0370222016-02-04 06:01:35 +000052def FancyURLopener():
53 with support.check_warnings(
54 ('FancyURLopener style of invoking requests is deprecated.',
55 DeprecationWarning)):
56 return urllib.request.FancyURLopener()
57
58
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030059def fakehttp(fakedata):
60 class FakeSocket(io.BytesIO):
61 io_refs = 1
62
63 def sendall(self, data):
64 FakeHTTPConnection.buf = data
65
66 def makefile(self, *args, **kwds):
67 self.io_refs += 1
68 return self
69
70 def read(self, amt=None):
71 if self.closed:
72 return b""
73 return io.BytesIO.read(self, amt)
74
75 def readline(self, length=None):
76 if self.closed:
77 return b""
78 return io.BytesIO.readline(self, length)
79
80 def close(self):
81 self.io_refs -= 1
82 if self.io_refs == 0:
83 io.BytesIO.close(self)
84
85 class FakeHTTPConnection(http.client.HTTPConnection):
86
87 # buffer to store data for verification in urlopen tests.
88 buf = None
89 fakesock = FakeSocket(fakedata)
90
91 def connect(self):
92 self.sock = self.fakesock
93
94 return FakeHTTPConnection
95
96
Senthil Kumarance260142011-11-01 01:35:17 +080097class FakeHTTPMixin(object):
98 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +080099 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +0300100 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +0800101
102 def unfakehttp(self):
103 http.client.HTTPConnection = self._connection_class
104
105
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700106class FakeFTPMixin(object):
107 def fakeftp(self):
108 class FakeFtpWrapper(object):
109 def __init__(self, user, passwd, host, port, dirs, timeout=None,
110 persistent=True):
111 pass
112
113 def retrfile(self, file, type):
114 return io.BytesIO(), 0
115
116 def close(self):
117 pass
118
119 self._ftpwrapper_class = urllib.request.ftpwrapper
120 urllib.request.ftpwrapper = FakeFtpWrapper
121
122 def unfakeftp(self):
123 urllib.request.ftpwrapper = self._ftpwrapper_class
124
125
Brett Cannon74bfd702003-04-25 09:39:47 +0000126class urlopen_FileTests(unittest.TestCase):
127 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000128
Brett Cannon74bfd702003-04-25 09:39:47 +0000129 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000130 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000131
Brett Cannon74bfd702003-04-25 09:39:47 +0000132 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000133
Brett Cannon74bfd702003-04-25 09:39:47 +0000134 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 # Create a temp file to use for testing
136 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
137 "ascii")
138 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000139 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000140 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000141 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000142 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000143 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000144 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000145
Brett Cannon74bfd702003-04-25 09:39:47 +0000146 def tearDown(self):
147 """Shut down the open object"""
148 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000149 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000150
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 def test_interface(self):
152 # Make sure object returned by urlopen() has the specified methods
153 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000154 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000155 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 "object returned by urlopen() lacks %s attribute" %
157 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000158
Brett Cannon74bfd702003-04-25 09:39:47 +0000159 def test_read(self):
160 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000161
Brett Cannon74bfd702003-04-25 09:39:47 +0000162 def test_readline(self):
163 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000164 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000165 "calling readline() after exhausting the file did not"
166 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000167
Brett Cannon74bfd702003-04-25 09:39:47 +0000168 def test_readlines(self):
169 lines_list = self.returned_obj.readlines()
170 self.assertEqual(len(lines_list), 1,
171 "readlines() returned the wrong number of lines")
172 self.assertEqual(lines_list[0], self.text,
173 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000174
Brett Cannon74bfd702003-04-25 09:39:47 +0000175 def test_fileno(self):
176 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000177 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000178 self.assertEqual(os.read(file_num, len(self.text)), self.text,
179 "Reading on the file descriptor returned by fileno() "
180 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000181
Brett Cannon74bfd702003-04-25 09:39:47 +0000182 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800183 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000184 # by the tearDown() method for the test
185 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000186
Brett Cannon74bfd702003-04-25 09:39:47 +0000187 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000188 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000189
Brett Cannon74bfd702003-04-25 09:39:47 +0000190 def test_geturl(self):
191 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000192
Christian Heimes9bd667a2008-01-20 15:14:11 +0000193 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000194 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000195
Brett Cannon74bfd702003-04-25 09:39:47 +0000196 def test_iter(self):
197 # Test iterator
198 # Don't need to count number of iterations since test would fail the
199 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200200 # comparison.
201 # Use the iterator in the usual implicit way to test for ticket #4608.
202 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000203 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000204
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800205 def test_relativelocalfile(self):
206 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
207
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000208class ProxyTests(unittest.TestCase):
209
210 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000211 # Records changes to env vars
212 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000213 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000214 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000215 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000216 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000217
218 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000219 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000220 self.env.__exit__()
221 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000222
223 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000224 self.env.set('NO_PROXY', 'localhost')
225 proxies = urllib.request.getproxies_environment()
226 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000227 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800228 # List of no_proxies with space.
229 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
230 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000231
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700232class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000233 """Test urlopen() opening a fake http connection."""
234
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000235 def check_read(self, ver):
236 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000237 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000238 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000239 self.assertEqual(fp.readline(), b"Hello!")
240 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000241 self.assertEqual(fp.geturl(), 'http://python.org/')
242 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000243 finally:
244 self.unfakehttp()
245
Senthil Kumaran26430412011-04-13 07:01:19 +0800246 def test_url_fragment(self):
247 # Issue #11703: geturl() omits fragments in the original URL.
248 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800249 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800250 try:
251 fp = urllib.request.urlopen(url)
252 self.assertEqual(fp.geturl(), url)
253 finally:
254 self.unfakehttp()
255
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800256 def test_willclose(self):
257 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800258 try:
259 resp = urlopen("http://www.python.org")
260 self.assertTrue(resp.fp.will_close)
261 finally:
262 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800263
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000264 def test_read_0_9(self):
265 # "0.9" response accepted (but not "simple responses" without
266 # a status line)
267 self.check_read(b"0.9")
268
269 def test_read_1_0(self):
270 self.check_read(b"1.0")
271
272 def test_read_1_1(self):
273 self.check_read(b"1.1")
274
Christian Heimes57dddfb2008-01-02 18:30:52 +0000275 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200276 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000277 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
278Date: Wed, 02 Jan 2008 03:03:54 GMT
279Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
280Connection: close
281Content-Type: text/html; charset=iso-8859-1
282''')
283 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200284 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000285 finally:
286 self.unfakehttp()
287
guido@google.coma119df92011-03-29 11:41:02 -0700288 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200289 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700290 self.fakehttp(b'''HTTP/1.1 302 Found
291Date: Wed, 02 Jan 2008 03:03:54 GMT
292Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
293Location: file://guidocomputer.athome.com:/python/license
294Connection: close
295Content-Type: text/html; charset=iso-8859-1
296''')
297 try:
Martin Pantera0370222016-02-04 06:01:35 +0000298 msg = "Redirection to url 'file:"
299 with self.assertRaisesRegex(urllib.error.HTTPError, msg):
300 urlopen("http://python.org/")
guido@google.coma119df92011-03-29 11:41:02 -0700301 finally:
302 self.unfakehttp()
303
Martin Pantera0370222016-02-04 06:01:35 +0000304 def test_redirect_limit_independent(self):
305 # Ticket #12923: make sure independent requests each use their
306 # own retry limit.
307 for i in range(FancyURLopener().maxtries):
308 self.fakehttp(b'''HTTP/1.1 302 Found
309Location: file://guidocomputer.athome.com:/python/license
310Connection: close
311''')
312 try:
313 self.assertRaises(urllib.error.HTTPError, urlopen,
314 "http://something")
315 finally:
316 self.unfakehttp()
317
Guido van Rossumd8faa362007-04-27 19:54:29 +0000318 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200319 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000320 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000321 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000322 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200323 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000324 finally:
325 self.unfakehttp()
326
Senthil Kumaranf5776862012-10-21 13:30:02 -0700327 def test_missing_localfile(self):
328 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700329 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700330 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700331 self.assertTrue(e.exception.filename)
332 self.assertTrue(e.exception.reason)
333
334 def test_file_notexists(self):
335 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700336 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700337 try:
338 self.assertTrue(os.path.exists(tmp_file))
339 with urlopen(tmp_fileurl) as fobj:
340 self.assertTrue(fobj)
341 finally:
342 os.close(fd)
343 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700344 self.assertFalse(os.path.exists(tmp_file))
345 with self.assertRaises(urllib.error.URLError):
346 urlopen(tmp_fileurl)
347
348 def test_ftp_nohost(self):
349 test_ftp_url = 'ftp:///path'
350 with self.assertRaises(urllib.error.URLError) as e:
351 urlopen(test_ftp_url)
352 self.assertFalse(e.exception.filename)
353 self.assertTrue(e.exception.reason)
354
355 def test_ftp_nonexisting(self):
356 with self.assertRaises(urllib.error.URLError) as e:
357 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
358 self.assertFalse(e.exception.filename)
359 self.assertTrue(e.exception.reason)
360
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700361 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
362 def test_ftp_cache_pruning(self):
363 self.fakeftp()
364 try:
365 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
366 urlopen('ftp://localhost')
367 finally:
368 self.unfakeftp()
369
Senthil Kumaranf5776862012-10-21 13:30:02 -0700370
Senthil Kumarande0eb242010-08-01 17:53:37 +0000371 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000372 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000373 try:
374 fp = urlopen("http://user:pass@python.org/")
375 self.assertEqual(fp.readline(), b"Hello!")
376 self.assertEqual(fp.readline(), b"")
377 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
378 self.assertEqual(fp.getcode(), 200)
379 finally:
380 self.unfakehttp()
381
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800382 def test_userpass_inurl_w_spaces(self):
383 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
384 try:
385 userpass = "a b:c d"
386 url = "http://{}@python.org/".format(userpass)
387 fakehttp_wrapper = http.client.HTTPConnection
388 authorization = ("Authorization: Basic %s\r\n" %
389 b64encode(userpass.encode("ASCII")).decode("ASCII"))
390 fp = urlopen(url)
391 # The authorization header must be in place
392 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
393 self.assertEqual(fp.readline(), b"Hello!")
394 self.assertEqual(fp.readline(), b"")
395 # the spaces are quoted in URL so no match
396 self.assertNotEqual(fp.geturl(), url)
397 self.assertEqual(fp.getcode(), 200)
398 finally:
399 self.unfakehttp()
400
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700401 def test_URLopener_deprecation(self):
402 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700403 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700404
Antoine Pitrou07df6552014-11-02 17:23:14 +0100405 @unittest.skipUnless(ssl, "ssl module required")
Senthil Kumarana5c85b32014-09-19 15:23:30 +0800406 def test_cafile_and_context(self):
407 context = ssl.create_default_context()
408 with self.assertRaises(ValueError):
409 urllib.request.urlopen(
410 "https://localhost", cafile="/nonexistent/path", context=context
411 )
412
Antoine Pitroudf204be2012-11-24 17:59:08 +0100413class urlopen_DataTests(unittest.TestCase):
414 """Test urlopen() opening a data URL."""
415
416 def setUp(self):
417 # text containing URL special- and unicode-characters
418 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
419 # 2x1 pixel RGB PNG image with one black and one white pixel
420 self.image = (
421 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
422 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
423 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
424 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
425
426 self.text_url = (
427 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
428 "D%26%20%C3%B6%20%C3%84%20")
429 self.text_url_base64 = (
430 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
431 "sJT0mIPYgxCA%3D")
432 # base64 encoded data URL that contains ignorable spaces,
433 # such as "\n", " ", "%0A", and "%20".
434 self.image_url = (
435 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
436 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
437 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
438
439 self.text_url_resp = urllib.request.urlopen(self.text_url)
440 self.text_url_base64_resp = urllib.request.urlopen(
441 self.text_url_base64)
442 self.image_url_resp = urllib.request.urlopen(self.image_url)
443
444 def test_interface(self):
445 # Make sure object returned by urlopen() has the specified methods
446 for attr in ("read", "readline", "readlines",
447 "close", "info", "geturl", "getcode", "__iter__"):
448 self.assertTrue(hasattr(self.text_url_resp, attr),
449 "object returned by urlopen() lacks %s attribute" %
450 attr)
451
452 def test_info(self):
453 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
454 self.assertEqual(self.text_url_base64_resp.info().get_params(),
455 [('text/plain', ''), ('charset', 'ISO-8859-1')])
456 self.assertEqual(self.image_url_resp.info()['content-length'],
457 str(len(self.image)))
458 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
459 [('text/plain', ''), ('charset', 'US-ASCII')])
460
461 def test_geturl(self):
462 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
463 self.assertEqual(self.text_url_base64_resp.geturl(),
464 self.text_url_base64)
465 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
466
467 def test_read_text(self):
468 self.assertEqual(self.text_url_resp.read().decode(
469 dict(self.text_url_resp.info().get_params())['charset']), self.text)
470
471 def test_read_text_base64(self):
472 self.assertEqual(self.text_url_base64_resp.read().decode(
473 dict(self.text_url_base64_resp.info().get_params())['charset']),
474 self.text)
475
476 def test_read_image(self):
477 self.assertEqual(self.image_url_resp.read(), self.image)
478
479 def test_missing_comma(self):
480 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
481
482 def test_invalid_base64_data(self):
483 # missing padding character
484 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
485
Brett Cannon19691362003-04-29 05:08:06 +0000486class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000487 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000488
Brett Cannon19691362003-04-29 05:08:06 +0000489 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000490 # Create a list of temporary files. Each item in the list is a file
491 # name (absolute path or relative to the current working directory).
492 # All files in this list will be deleted in the tearDown method. Note,
493 # this only helps to makes sure temporary files get deleted, but it
494 # does nothing about trying to close files that may still be open. It
495 # is the responsibility of the developer to properly close files even
496 # when exceptional conditions occur.
497 self.tempFiles = []
498
Brett Cannon19691362003-04-29 05:08:06 +0000499 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000500 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000501 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000502 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000503 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000504 FILE.write(self.text)
505 FILE.close()
506 finally:
507 try: FILE.close()
508 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000509
510 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000511 # Delete the temporary files.
512 for each in self.tempFiles:
513 try: os.remove(each)
514 except: pass
515
516 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000517 filePath = os.path.abspath(filePath)
518 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000519 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000520 except UnicodeEncodeError:
521 raise unittest.SkipTest("filePath is not encodable to utf8")
522 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000523
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000524 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000525 """Creates a new temporary file containing the specified data,
526 registers the file for deletion during the test fixture tear down, and
527 returns the absolute path of the file."""
528
529 newFd, newFilePath = tempfile.mkstemp()
530 try:
531 self.registerFileForCleanUp(newFilePath)
532 newFile = os.fdopen(newFd, "wb")
533 newFile.write(data)
534 newFile.close()
535 finally:
536 try: newFile.close()
537 except: pass
538 return newFilePath
539
540 def registerFileForCleanUp(self, fileName):
541 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000542
543 def test_basic(self):
544 # Make sure that a local file just gets its own location returned and
545 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000546 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000547 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000548 self.assertIsInstance(result[1], email.message.Message,
Martin Panter7462b6492015-11-02 03:37:02 +0000549 "did not get an email.message.Message instance "
Ezio Melottie9615932010-01-24 19:26:24 +0000550 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000551
552 def test_copy(self):
553 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000554 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000555 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000556 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000557 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000558 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000559 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000560 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000561 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000562 try:
563 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000564 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000565 finally:
566 try: FILE.close()
567 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000568 self.assertEqual(self.text, text)
569
570 def test_reporthook(self):
571 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700572 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
573 self.assertIsInstance(block_count, int)
574 self.assertIsInstance(block_read_size, int)
575 self.assertIsInstance(file_size, int)
576 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000577 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000578 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000579 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000580 urllib.request.urlretrieve(
581 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000582 second_temp, hooktester)
583
584 def test_reporthook_0_bytes(self):
585 # Test on zero length file. Should call reporthook only 1 time.
586 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700587 def hooktester(block_count, block_read_size, file_size, _report=report):
588 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000589 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000590 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000591 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000592 self.assertEqual(len(report), 1)
593 self.assertEqual(report[0][2], 0)
594
595 def test_reporthook_5_bytes(self):
596 # Test on 5 byte file. Should call reporthook only 2 times (once when
597 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700598 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000599 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700600 def hooktester(block_count, block_read_size, file_size, _report=report):
601 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000602 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000603 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000604 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000605 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800606 self.assertEqual(report[0][2], 5)
607 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000608
609 def test_reporthook_8193_bytes(self):
610 # Test on 8193 byte file. Should call reporthook only 3 times (once
611 # when the "network connection" is established, once for the next 8192
612 # bytes, and once for the last byte).
613 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700614 def hooktester(block_count, block_read_size, file_size, _report=report):
615 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000616 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000617 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000618 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000619 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800620 self.assertEqual(report[0][2], 8193)
621 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700622 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800623 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000624
Senthil Kumarance260142011-11-01 01:35:17 +0800625
626class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
627 """Test urllib.urlretrieve() using fake http connections"""
628
629 def test_short_content_raises_ContentTooShortError(self):
630 self.fakehttp(b'''HTTP/1.1 200 OK
631Date: Wed, 02 Jan 2008 03:03:54 GMT
632Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
633Connection: close
634Content-Length: 100
635Content-Type: text/html; charset=iso-8859-1
636
637FF
638''')
639
640 def _reporthook(par1, par2, par3):
641 pass
642
643 with self.assertRaises(urllib.error.ContentTooShortError):
644 try:
645 urllib.request.urlretrieve('http://example.com/',
646 reporthook=_reporthook)
647 finally:
648 self.unfakehttp()
649
650 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
651 self.fakehttp(b'''HTTP/1.1 200 OK
652Date: Wed, 02 Jan 2008 03:03:54 GMT
653Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
654Connection: close
655Content-Length: 100
656Content-Type: text/html; charset=iso-8859-1
657
658FF
659''')
660 with self.assertRaises(urllib.error.ContentTooShortError):
661 try:
662 urllib.request.urlretrieve('http://example.com/')
663 finally:
664 self.unfakehttp()
665
666
Brett Cannon74bfd702003-04-25 09:39:47 +0000667class QuotingTests(unittest.TestCase):
668 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000669
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000670 According to RFC 2396 (Uniform Resource Identifiers), to escape a
671 character you write it as '%' + <2 character US-ASCII hex value>.
672 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
673 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000674
675 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000676
Brett Cannon74bfd702003-04-25 09:39:47 +0000677 Reserved characters : ";/?:@&=+$,"
678 Have special meaning in URIs and must be escaped if not being used for
679 their special meaning
680 Data characters : letters, digits, and "-_.!~*'()"
681 Unreserved and do not need to be escaped; can be, though, if desired
682 Control characters : 0x00 - 0x1F, 0x7F
683 Have no use in URIs so must be escaped
684 space : 0x20
685 Must be escaped
686 Delimiters : '<>#%"'
687 Must be escaped
688 Unwise : "{}|\^[]`"
689 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000690
Brett Cannon74bfd702003-04-25 09:39:47 +0000691 """
692
693 def test_never_quote(self):
694 # Make sure quote() does not quote letters, digits, and "_,.-"
695 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
696 "abcdefghijklmnopqrstuvwxyz",
697 "0123456789",
698 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000699 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000700 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000701 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000702 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000703 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000704 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000705
706 def test_default_safe(self):
707 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000708 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000709
710 def test_safe(self):
711 # Test setting 'safe' parameter does what it should do
712 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000713 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000714 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000715 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000716 result = urllib.parse.quote_plus(quote_by_default,
717 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000718 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000719 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000720 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000721 # Safe expressed as bytes rather than str
722 result = urllib.parse.quote(quote_by_default, safe=b"<>")
723 self.assertEqual(quote_by_default, result,
724 "using quote(): %r != %r" % (quote_by_default, result))
725 # "Safe" non-ASCII characters should have no effect
726 # (Since URIs are not allowed to have non-ASCII characters)
727 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
728 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
729 self.assertEqual(expect, result,
730 "using quote(): %r != %r" %
731 (expect, result))
732 # Same as above, but using a bytes rather than str
733 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
734 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
735 self.assertEqual(expect, result,
736 "using quote(): %r != %r" %
737 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000738
739 def test_default_quoting(self):
740 # Make sure all characters that should be quoted are by default sans
741 # space (separate test for that).
742 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
743 should_quote.append('<>#%"{}|\^[]`')
744 should_quote.append(chr(127)) # For 0x7F
745 should_quote = ''.join(should_quote)
746 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000747 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000748 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000749 "using quote(): "
750 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000751 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000752 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000753 self.assertEqual(hexescape(char), result,
754 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000755 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000756 (char, hexescape(char), result))
757 del should_quote
758 partial_quote = "ab[]cd"
759 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000760 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000761 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000762 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800763 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000764 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000765 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000766
767 def test_quoting_space(self):
768 # Make sure quote() and quote_plus() handle spaces as specified in
769 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000770 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000771 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000772 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000773 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000774 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000775 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000776 given = "a b cd e f"
777 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000778 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000779 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000780 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000781 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000782 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000783 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000784 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000785
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000786 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000787 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000788 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000789 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000790 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000791 # Test with bytes
792 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
793 'alpha%2Bbeta+gamma')
794 # Test with safe bytes
795 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
796 'alpha+beta+gamma')
797
798 def test_quote_bytes(self):
799 # Bytes should quote directly to percent-encoded values
800 given = b"\xa2\xd8ab\xff"
801 expect = "%A2%D8ab%FF"
802 result = urllib.parse.quote(given)
803 self.assertEqual(expect, result,
804 "using quote(): %r != %r" % (expect, result))
805 # Encoding argument should raise type error on bytes input
806 self.assertRaises(TypeError, urllib.parse.quote, given,
807 encoding="latin-1")
808 # quote_from_bytes should work the same
809 result = urllib.parse.quote_from_bytes(given)
810 self.assertEqual(expect, result,
811 "using quote_from_bytes(): %r != %r"
812 % (expect, result))
813
814 def test_quote_with_unicode(self):
815 # Characters in Latin-1 range, encoded by default in UTF-8
816 given = "\xa2\xd8ab\xff"
817 expect = "%C2%A2%C3%98ab%C3%BF"
818 result = urllib.parse.quote(given)
819 self.assertEqual(expect, result,
820 "using quote(): %r != %r" % (expect, result))
821 # Characters in Latin-1 range, encoded by with None (default)
822 result = urllib.parse.quote(given, encoding=None, errors=None)
823 self.assertEqual(expect, result,
824 "using quote(): %r != %r" % (expect, result))
825 # Characters in Latin-1 range, encoded with Latin-1
826 given = "\xa2\xd8ab\xff"
827 expect = "%A2%D8ab%FF"
828 result = urllib.parse.quote(given, encoding="latin-1")
829 self.assertEqual(expect, result,
830 "using quote(): %r != %r" % (expect, result))
831 # Characters in BMP, encoded by default in UTF-8
832 given = "\u6f22\u5b57" # "Kanji"
833 expect = "%E6%BC%A2%E5%AD%97"
834 result = urllib.parse.quote(given)
835 self.assertEqual(expect, result,
836 "using quote(): %r != %r" % (expect, result))
837 # Characters in BMP, encoded with Latin-1
838 given = "\u6f22\u5b57"
839 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
840 encoding="latin-1")
841 # Characters in BMP, encoded with Latin-1, with replace error handling
842 given = "\u6f22\u5b57"
843 expect = "%3F%3F" # "??"
844 result = urllib.parse.quote(given, encoding="latin-1",
845 errors="replace")
846 self.assertEqual(expect, result,
847 "using quote(): %r != %r" % (expect, result))
848 # Characters in BMP, Latin-1, with xmlcharref error handling
849 given = "\u6f22\u5b57"
850 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
851 result = urllib.parse.quote(given, encoding="latin-1",
852 errors="xmlcharrefreplace")
853 self.assertEqual(expect, result,
854 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000855
Georg Brandlfaf41492009-05-26 18:31:11 +0000856 def test_quote_plus_with_unicode(self):
857 # Encoding (latin-1) test for quote_plus
858 given = "\xa2\xd8 \xff"
859 expect = "%A2%D8+%FF"
860 result = urllib.parse.quote_plus(given, encoding="latin-1")
861 self.assertEqual(expect, result,
862 "using quote_plus(): %r != %r" % (expect, result))
863 # Errors test for quote_plus
864 given = "ab\u6f22\u5b57 cd"
865 expect = "ab%3F%3F+cd"
866 result = urllib.parse.quote_plus(given, encoding="latin-1",
867 errors="replace")
868 self.assertEqual(expect, result,
869 "using quote_plus(): %r != %r" % (expect, result))
870
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000871
Brett Cannon74bfd702003-04-25 09:39:47 +0000872class UnquotingTests(unittest.TestCase):
873 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000874
Brett Cannon74bfd702003-04-25 09:39:47 +0000875 See the doc string for quoting_Tests for details on quoting and such.
876
877 """
878
879 def test_unquoting(self):
880 # Make sure unquoting of all ASCII values works
881 escape_list = []
882 for num in range(128):
883 given = hexescape(chr(num))
884 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000885 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000886 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000887 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000888 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000889 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000890 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000891 (expect, result))
892 escape_list.append(given)
893 escape_string = ''.join(escape_list)
894 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000895 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000896 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000897 "using unquote(): not all characters escaped: "
898 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000899 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
900 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000901 with support.check_warnings(('', BytesWarning), quiet=True):
902 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000903
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000904 def test_unquoting_badpercent(self):
905 # Test unquoting on bad percent-escapes
906 given = '%xab'
907 expect = given
908 result = urllib.parse.unquote(given)
909 self.assertEqual(expect, result, "using unquote(): %r != %r"
910 % (expect, result))
911 given = '%x'
912 expect = given
913 result = urllib.parse.unquote(given)
914 self.assertEqual(expect, result, "using unquote(): %r != %r"
915 % (expect, result))
916 given = '%'
917 expect = given
918 result = urllib.parse.unquote(given)
919 self.assertEqual(expect, result, "using unquote(): %r != %r"
920 % (expect, result))
921 # unquote_to_bytes
922 given = '%xab'
923 expect = bytes(given, 'ascii')
924 result = urllib.parse.unquote_to_bytes(given)
925 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
926 % (expect, result))
927 given = '%x'
928 expect = bytes(given, 'ascii')
929 result = urllib.parse.unquote_to_bytes(given)
930 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
931 % (expect, result))
932 given = '%'
933 expect = bytes(given, 'ascii')
934 result = urllib.parse.unquote_to_bytes(given)
935 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
936 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000937 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
938 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000939
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000940 def test_unquoting_mixed_case(self):
941 # Test unquoting on mixed-case hex digits in the percent-escapes
942 given = '%Ab%eA'
943 expect = b'\xab\xea'
944 result = urllib.parse.unquote_to_bytes(given)
945 self.assertEqual(expect, result,
946 "using unquote_to_bytes(): %r != %r"
947 % (expect, result))
948
Brett Cannon74bfd702003-04-25 09:39:47 +0000949 def test_unquoting_parts(self):
950 # Make sure unquoting works when have non-quoted characters
951 # interspersed
952 given = 'ab%sd' % hexescape('c')
953 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000954 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000955 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000956 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000957 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000958 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000959 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000960
Brett Cannon74bfd702003-04-25 09:39:47 +0000961 def test_unquoting_plus(self):
962 # Test difference between unquote() and unquote_plus()
963 given = "are+there+spaces..."
964 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000965 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000966 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000967 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000968 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000969 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000970 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000971 "using unquote_plus(): %r != %r" % (expect, result))
972
973 def test_unquote_to_bytes(self):
974 given = 'br%C3%BCckner_sapporo_20050930.doc'
975 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
976 result = urllib.parse.unquote_to_bytes(given)
977 self.assertEqual(expect, result,
978 "using unquote_to_bytes(): %r != %r"
979 % (expect, result))
980 # Test on a string with unescaped non-ASCII characters
981 # (Technically an invalid URI; expect those characters to be UTF-8
982 # encoded).
983 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
984 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
985 self.assertEqual(expect, result,
986 "using unquote_to_bytes(): %r != %r"
987 % (expect, result))
988 # Test with a bytes as input
989 given = b'%A2%D8ab%FF'
990 expect = b'\xa2\xd8ab\xff'
991 result = urllib.parse.unquote_to_bytes(given)
992 self.assertEqual(expect, result,
993 "using unquote_to_bytes(): %r != %r"
994 % (expect, result))
995 # Test with a bytes as input, with unescaped non-ASCII bytes
996 # (Technically an invalid URI; expect those bytes to be preserved)
997 given = b'%A2\xd8ab%FF'
998 expect = b'\xa2\xd8ab\xff'
999 result = urllib.parse.unquote_to_bytes(given)
1000 self.assertEqual(expect, result,
1001 "using unquote_to_bytes(): %r != %r"
1002 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +00001003
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001004 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +00001005 # Characters in the Latin-1 range, encoded with UTF-8
1006 given = 'br%C3%BCckner_sapporo_20050930.doc'
1007 expect = 'br\u00fcckner_sapporo_20050930.doc'
1008 result = urllib.parse.unquote(given)
1009 self.assertEqual(expect, result,
1010 "using unquote(): %r != %r" % (expect, result))
1011 # Characters in the Latin-1 range, encoded with None (default)
1012 result = urllib.parse.unquote(given, encoding=None, errors=None)
1013 self.assertEqual(expect, result,
1014 "using unquote(): %r != %r" % (expect, result))
1015
1016 # Characters in the Latin-1 range, encoded with Latin-1
1017 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1018 encoding="latin-1")
1019 expect = 'br\u00fcckner_sapporo_20050930.doc'
1020 self.assertEqual(expect, result,
1021 "using unquote(): %r != %r" % (expect, result))
1022
1023 # Characters in BMP, encoded with UTF-8
1024 given = "%E6%BC%A2%E5%AD%97"
1025 expect = "\u6f22\u5b57" # "Kanji"
1026 result = urllib.parse.unquote(given)
1027 self.assertEqual(expect, result,
1028 "using unquote(): %r != %r" % (expect, result))
1029
1030 # Decode with UTF-8, invalid sequence
1031 given = "%F3%B1"
1032 expect = "\ufffd" # Replacement character
1033 result = urllib.parse.unquote(given)
1034 self.assertEqual(expect, result,
1035 "using unquote(): %r != %r" % (expect, result))
1036
1037 # Decode with UTF-8, invalid sequence, replace errors
1038 result = urllib.parse.unquote(given, errors="replace")
1039 self.assertEqual(expect, result,
1040 "using unquote(): %r != %r" % (expect, result))
1041
1042 # Decode with UTF-8, invalid sequence, ignoring errors
1043 given = "%F3%B1"
1044 expect = ""
1045 result = urllib.parse.unquote(given, errors="ignore")
1046 self.assertEqual(expect, result,
1047 "using unquote(): %r != %r" % (expect, result))
1048
1049 # A mix of non-ASCII and percent-encoded characters, UTF-8
1050 result = urllib.parse.unquote("\u6f22%C3%BC")
1051 expect = '\u6f22\u00fc'
1052 self.assertEqual(expect, result,
1053 "using unquote(): %r != %r" % (expect, result))
1054
1055 # A mix of non-ASCII and percent-encoded characters, Latin-1
1056 # (Note, the string contains non-Latin-1-representable characters)
1057 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1058 expect = '\u6f22\u00fc'
1059 self.assertEqual(expect, result,
1060 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001061
Brett Cannon74bfd702003-04-25 09:39:47 +00001062class urlencode_Tests(unittest.TestCase):
1063 """Tests for urlencode()"""
1064
1065 def help_inputtype(self, given, test_type):
1066 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001067
Brett Cannon74bfd702003-04-25 09:39:47 +00001068 'given' must lead to only the pairs:
1069 * 1st, 1
1070 * 2nd, 2
1071 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001072
Brett Cannon74bfd702003-04-25 09:39:47 +00001073 Test cannot assume anything about order. Docs make no guarantee and
1074 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001075
Brett Cannon74bfd702003-04-25 09:39:47 +00001076 """
1077 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001078 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001079 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001080 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001081 "testing %s: %s not found in %s" %
1082 (test_type, expected, result))
1083 self.assertEqual(result.count('&'), 2,
1084 "testing %s: expected 2 '&'s; got %s" %
1085 (test_type, result.count('&')))
1086 amp_location = result.index('&')
1087 on_amp_left = result[amp_location - 1]
1088 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001089 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001090 "testing %s: '&' not located in proper place in %s" %
1091 (test_type, result))
1092 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1093 "testing %s: "
1094 "unexpected number of characters: %s != %s" %
1095 (test_type, len(result), (5 * 3) + 2))
1096
1097 def test_using_mapping(self):
1098 # Test passing in a mapping object as an argument.
1099 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1100 "using dict as input type")
1101
1102 def test_using_sequence(self):
1103 # Test passing in a sequence of two-item sequences as an argument.
1104 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1105 "using sequence of two-item tuples as input")
1106
1107 def test_quoting(self):
1108 # Make sure keys and values are quoted using quote_plus()
1109 given = {"&":"="}
1110 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001111 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001112 self.assertEqual(expect, result)
1113 given = {"key name":"A bunch of pluses"}
1114 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001115 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001116 self.assertEqual(expect, result)
1117
1118 def test_doseq(self):
1119 # Test that passing True for 'doseq' parameter works correctly
1120 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001121 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1122 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001123 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001124 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001125 for value in given["sequence"]:
1126 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001127 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001128 self.assertEqual(result.count('&'), 2,
1129 "Expected 2 '&'s, got %s" % result.count('&'))
1130
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001131 def test_empty_sequence(self):
1132 self.assertEqual("", urllib.parse.urlencode({}))
1133 self.assertEqual("", urllib.parse.urlencode([]))
1134
1135 def test_nonstring_values(self):
1136 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1137 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1138
1139 def test_nonstring_seq_values(self):
1140 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1141 self.assertEqual("a=None&a=a",
1142 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001143 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001144 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001145 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001146
Senthil Kumarandf022da2010-07-03 17:48:22 +00001147 def test_urlencode_encoding(self):
1148 # ASCII encoding. Expect %3F with errors="replace'
1149 given = (('\u00a0', '\u00c1'),)
1150 expect = '%3F=%3F'
1151 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1152 self.assertEqual(expect, result)
1153
1154 # Default is UTF-8 encoding.
1155 given = (('\u00a0', '\u00c1'),)
1156 expect = '%C2%A0=%C3%81'
1157 result = urllib.parse.urlencode(given)
1158 self.assertEqual(expect, result)
1159
1160 # Latin-1 encoding.
1161 given = (('\u00a0', '\u00c1'),)
1162 expect = '%A0=%C1'
1163 result = urllib.parse.urlencode(given, encoding="latin-1")
1164 self.assertEqual(expect, result)
1165
1166 def test_urlencode_encoding_doseq(self):
1167 # ASCII Encoding. Expect %3F with errors="replace'
1168 given = (('\u00a0', '\u00c1'),)
1169 expect = '%3F=%3F'
1170 result = urllib.parse.urlencode(given, doseq=True,
1171 encoding="ASCII", errors="replace")
1172 self.assertEqual(expect, result)
1173
1174 # ASCII Encoding. On a sequence of values.
1175 given = (("\u00a0", (1, "\u00c1")),)
1176 expect = '%3F=1&%3F=%3F'
1177 result = urllib.parse.urlencode(given, True,
1178 encoding="ASCII", errors="replace")
1179 self.assertEqual(expect, result)
1180
1181 # Utf-8
1182 given = (("\u00a0", "\u00c1"),)
1183 expect = '%C2%A0=%C3%81'
1184 result = urllib.parse.urlencode(given, True)
1185 self.assertEqual(expect, result)
1186
1187 given = (("\u00a0", (42, "\u00c1")),)
1188 expect = '%C2%A0=42&%C2%A0=%C3%81'
1189 result = urllib.parse.urlencode(given, True)
1190 self.assertEqual(expect, result)
1191
1192 # latin-1
1193 given = (("\u00a0", "\u00c1"),)
1194 expect = '%A0=%C1'
1195 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1196 self.assertEqual(expect, result)
1197
1198 given = (("\u00a0", (42, "\u00c1")),)
1199 expect = '%A0=42&%A0=%C1'
1200 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1201 self.assertEqual(expect, result)
1202
1203 def test_urlencode_bytes(self):
1204 given = ((b'\xa0\x24', b'\xc1\x24'),)
1205 expect = '%A0%24=%C1%24'
1206 result = urllib.parse.urlencode(given)
1207 self.assertEqual(expect, result)
1208 result = urllib.parse.urlencode(given, True)
1209 self.assertEqual(expect, result)
1210
1211 # Sequence of values
1212 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1213 expect = '%A0%24=42&%A0%24=%C1%24'
1214 result = urllib.parse.urlencode(given, True)
1215 self.assertEqual(expect, result)
1216
1217 def test_urlencode_encoding_safe_parameter(self):
1218
1219 # Send '$' (\x24) as safe character
1220 # Default utf-8 encoding
1221
1222 given = ((b'\xa0\x24', b'\xc1\x24'),)
1223 result = urllib.parse.urlencode(given, safe=":$")
1224 expect = '%A0$=%C1$'
1225 self.assertEqual(expect, result)
1226
1227 given = ((b'\xa0\x24', b'\xc1\x24'),)
1228 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1229 expect = '%A0$=%C1$'
1230 self.assertEqual(expect, result)
1231
1232 # Safe parameter in sequence
1233 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1234 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1235 result = urllib.parse.urlencode(given, True, safe=":$")
1236 self.assertEqual(expect, result)
1237
1238 # Test all above in latin-1 encoding
1239
1240 given = ((b'\xa0\x24', b'\xc1\x24'),)
1241 result = urllib.parse.urlencode(given, safe=":$",
1242 encoding="latin-1")
1243 expect = '%A0$=%C1$'
1244 self.assertEqual(expect, result)
1245
1246 given = ((b'\xa0\x24', b'\xc1\x24'),)
1247 expect = '%A0$=%C1$'
1248 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1249 encoding="latin-1")
1250
1251 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1252 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1253 result = urllib.parse.urlencode(given, True, safe=":$",
1254 encoding="latin-1")
1255 self.assertEqual(expect, result)
1256
Brett Cannon74bfd702003-04-25 09:39:47 +00001257class Pathname_Tests(unittest.TestCase):
1258 """Test pathname2url() and url2pathname()"""
1259
1260 def test_basic(self):
1261 # Make sure simple tests pass
1262 expected_path = os.path.join("parts", "of", "a", "path")
1263 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001264 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001265 self.assertEqual(expected_url, result,
1266 "pathname2url() failed; %s != %s" %
1267 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001268 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001269 self.assertEqual(expected_path, result,
1270 "url2pathame() failed; %s != %s" %
1271 (result, expected_path))
1272
1273 def test_quoting(self):
1274 # Test automatic quoting and unquoting works for pathnam2url() and
1275 # url2pathname() respectively
1276 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001277 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1278 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001279 self.assertEqual(expect, result,
1280 "pathname2url() failed; %s != %s" %
1281 (expect, result))
1282 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001283 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001284 self.assertEqual(expect, result,
1285 "url2pathname() failed; %s != %s" %
1286 (expect, result))
1287 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001288 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1289 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001290 self.assertEqual(expect, result,
1291 "pathname2url() failed; %s != %s" %
1292 (expect, result))
1293 given = "make+sure/using_unquote"
1294 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001295 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001296 self.assertEqual(expect, result,
1297 "url2pathname() failed; %s != %s" %
1298 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001299
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001300 @unittest.skipUnless(sys.platform == 'win32',
1301 'test specific to the urllib.url2path function.')
1302 def test_ntpath(self):
1303 given = ('/C:/', '///C:/', '/C|//')
1304 expect = 'C:\\'
1305 for url in given:
1306 result = urllib.request.url2pathname(url)
1307 self.assertEqual(expect, result,
1308 'urllib.request..url2pathname() failed; %s != %s' %
1309 (expect, result))
1310 given = '///C|/path'
1311 expect = 'C:\\path'
1312 result = urllib.request.url2pathname(given)
1313 self.assertEqual(expect, result,
1314 'urllib.request.url2pathname() failed; %s != %s' %
1315 (expect, result))
1316
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001317class Utility_Tests(unittest.TestCase):
1318 """Testcase to test the various utility functions in the urllib."""
1319
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001320 def test_thishost(self):
1321 """Test the urllib.request.thishost utility function returns a tuple"""
1322 self.assertIsInstance(urllib.request.thishost(), tuple)
1323
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001324
1325class URLopener_Tests(unittest.TestCase):
1326 """Testcase to test the open method of URLopener class."""
1327
1328 def test_quoted_open(self):
1329 class DummyURLopener(urllib.request.URLopener):
1330 def open_spam(self, url):
1331 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001332 with support.check_warnings(
1333 ('DummyURLopener style of invoking requests is deprecated.',
1334 DeprecationWarning)):
1335 self.assertEqual(DummyURLopener().open(
1336 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001337
Ezio Melotti79b99db2013-02-21 02:41:42 +02001338 # test the safe characters are not quoted by urlopen
1339 self.assertEqual(DummyURLopener().open(
1340 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1341 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001342
Guido van Rossume7ba4952007-06-06 23:52:48 +00001343# Just commented them out.
1344# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001345# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001346# fail in one of the tests, sometimes in other. I have a linux, and
1347# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001348# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001349# . Facundo
1350#
1351# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001352# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001353# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1354# serv.settimeout(3)
1355# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1356# serv.bind(("", 9093))
Charles-François Natali6e204602014-07-23 19:28:13 +01001357# serv.listen()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001358# try:
1359# conn, addr = serv.accept()
1360# conn.send("1 Hola mundo\n")
1361# cantdata = 0
1362# while cantdata < 13:
1363# data = conn.recv(13-cantdata)
1364# cantdata += len(data)
1365# time.sleep(.3)
1366# conn.send("2 No more lines\n")
1367# conn.close()
1368# except socket.timeout:
1369# pass
1370# finally:
1371# serv.close()
1372# evt.set()
1373#
1374# class FTPWrapperTests(unittest.TestCase):
1375#
1376# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001377# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001378# ftplib.FTP.port = 9093
1379# self.evt = threading.Event()
1380# threading.Thread(target=server, args=(self.evt,)).start()
1381# time.sleep(.1)
1382#
1383# def tearDown(self):
1384# self.evt.wait()
1385#
1386# def testBasic(self):
1387# # connects
1388# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001389# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001390#
1391# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001392# # global default timeout is ignored
1393# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001394# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001395# socket.setdefaulttimeout(30)
1396# try:
1397# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1398# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001399# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001400# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001401# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001402#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001403# def testTimeoutDefault(self):
1404# # global default timeout is used
1405# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001406# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001407# socket.setdefaulttimeout(30)
1408# try:
1409# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1410# finally:
1411# socket.setdefaulttimeout(None)
1412# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1413# ftp.close()
1414#
1415# def testTimeoutValue(self):
1416# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1417# timeout=30)
1418# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1419# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001420
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001421
Senthil Kumarande49d642011-10-16 23:54:44 +08001422class RequestTests(unittest.TestCase):
1423 """Unit tests for urllib.request.Request."""
1424
1425 def test_default_values(self):
1426 Request = urllib.request.Request
1427 request = Request("http://www.python.org")
1428 self.assertEqual(request.get_method(), 'GET')
1429 request = Request("http://www.python.org", {})
1430 self.assertEqual(request.get_method(), 'POST')
1431
1432 def test_with_method_arg(self):
1433 Request = urllib.request.Request
1434 request = Request("http://www.python.org", method='HEAD')
1435 self.assertEqual(request.method, 'HEAD')
1436 self.assertEqual(request.get_method(), 'HEAD')
1437 request = Request("http://www.python.org", {}, method='HEAD')
1438 self.assertEqual(request.method, 'HEAD')
1439 self.assertEqual(request.get_method(), 'HEAD')
1440 request = Request("http://www.python.org", method='GET')
1441 self.assertEqual(request.get_method(), 'GET')
1442 request.method = 'HEAD'
1443 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001444
1445
Senthil Kumaran277e9092013-04-10 20:51:19 -07001446class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001447
Senthil Kumaran277e9092013-04-10 20:51:19 -07001448 def test_converting_drive_letter(self):
1449 self.assertEqual(url2pathname("///C|"), 'C:')
1450 self.assertEqual(url2pathname("///C:"), 'C:')
1451 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001452
Senthil Kumaran277e9092013-04-10 20:51:19 -07001453 def test_converting_when_no_drive_letter(self):
1454 # cannot end a raw string in \
1455 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1456 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1457
1458 def test_simple_compare(self):
1459 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1460 r'C:\foo\bar\spam.foo')
1461
1462 def test_non_ascii_drive_letter(self):
1463 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1464
1465 def test_roundtrip_url2pathname(self):
1466 list_of_paths = ['C:',
1467 r'\\\C\test\\',
1468 r'C:\foo\bar\spam.foo'
1469 ]
1470 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001471 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001472
1473class PathName2URLTests(unittest.TestCase):
1474
1475 def test_converting_drive_letter(self):
1476 self.assertEqual(pathname2url("C:"), '///C:')
1477 self.assertEqual(pathname2url("C:\\"), '///C:')
1478
1479 def test_converting_when_no_drive_letter(self):
1480 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1481 '/////folder/test/')
1482 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1483 '////folder/test/')
1484 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1485 '/folder/test/')
1486
1487 def test_simple_compare(self):
1488 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1489 "///C:/foo/bar/spam.foo" )
1490
1491 def test_long_drive_letter(self):
1492 self.assertRaises(IOError, pathname2url, "XX:\\")
1493
1494 def test_roundtrip_pathname2url(self):
1495 list_of_paths = ['///C:',
1496 '/////folder/test/',
1497 '///C:/foo/bar/spam.foo']
1498 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001499 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001500
1501if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001502 unittest.main()