blob: 1a5013ed2696a5b507169aed5bae3c1d89211ca7 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080013import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000014import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070015from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000016
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080017from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010018import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080019
Senthil Kumaran8b081b72013-04-10 20:53:12 -070020
Brett Cannon74bfd702003-04-25 09:39:47 +000021def hexescape(char):
22 """Escape char as RFC 2396 specifies"""
23 hex_repr = hex(ord(char))[2:].upper()
24 if len(hex_repr) == 1:
25 hex_repr = "0%s" % hex_repr
26 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000027
Jeremy Hylton1afc1692008-06-18 20:49:58 +000028# Shortcut for testing FancyURLopener
29_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070030
31
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032def urlopen(url, data=None, proxies=None):
33 """urlopen(url [, data]) -> open file-like object"""
34 global _urlopener
35 if proxies is not None:
36 opener = urllib.request.FancyURLopener(proxies=proxies)
37 elif not _urlopener:
Ezio Melotti79b99db2013-02-21 02:41:42 +020038 with support.check_warnings(
39 ('FancyURLopener style of invoking requests is deprecated.',
40 DeprecationWarning)):
41 opener = urllib.request.FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000042 _urlopener = opener
43 else:
44 opener = _urlopener
45 if data is None:
46 return opener.open(url)
47 else:
48 return opener.open(url, data)
49
Senthil Kumarance260142011-11-01 01:35:17 +080050
51class FakeHTTPMixin(object):
52 def fakehttp(self, fakedata):
53 class FakeSocket(io.BytesIO):
54 io_refs = 1
55
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080056 def sendall(self, data):
57 FakeHTTPConnection.buf = data
Senthil Kumarance260142011-11-01 01:35:17 +080058
59 def makefile(self, *args, **kwds):
60 self.io_refs += 1
61 return self
62
63 def read(self, amt=None):
64 if self.closed:
65 return b""
66 return io.BytesIO.read(self, amt)
67
68 def readline(self, length=None):
69 if self.closed:
70 return b""
71 return io.BytesIO.readline(self, length)
72
73 def close(self):
74 self.io_refs -= 1
75 if self.io_refs == 0:
76 io.BytesIO.close(self)
77
78 class FakeHTTPConnection(http.client.HTTPConnection):
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080079
80 # buffer to store data for verification in urlopen tests.
81 buf = None
82
Senthil Kumarance260142011-11-01 01:35:17 +080083 def connect(self):
84 self.sock = FakeSocket(fakedata)
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080085
Senthil Kumarance260142011-11-01 01:35:17 +080086 self._connection_class = http.client.HTTPConnection
87 http.client.HTTPConnection = FakeHTTPConnection
88
89 def unfakehttp(self):
90 http.client.HTTPConnection = self._connection_class
91
92
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070093class FakeFTPMixin(object):
94 def fakeftp(self):
95 class FakeFtpWrapper(object):
96 def __init__(self, user, passwd, host, port, dirs, timeout=None,
97 persistent=True):
98 pass
99
100 def retrfile(self, file, type):
101 return io.BytesIO(), 0
102
103 def close(self):
104 pass
105
106 self._ftpwrapper_class = urllib.request.ftpwrapper
107 urllib.request.ftpwrapper = FakeFtpWrapper
108
109 def unfakeftp(self):
110 urllib.request.ftpwrapper = self._ftpwrapper_class
111
112
Brett Cannon74bfd702003-04-25 09:39:47 +0000113class urlopen_FileTests(unittest.TestCase):
114 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000115
Brett Cannon74bfd702003-04-25 09:39:47 +0000116 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000117 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000118
Brett Cannon74bfd702003-04-25 09:39:47 +0000119 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000120
Brett Cannon74bfd702003-04-25 09:39:47 +0000121 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000122 # Create a temp file to use for testing
123 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
124 "ascii")
125 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000126 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000127 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000128 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000129 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000130 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000131 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000132
Brett Cannon74bfd702003-04-25 09:39:47 +0000133 def tearDown(self):
134 """Shut down the open object"""
135 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000136 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000137
Brett Cannon74bfd702003-04-25 09:39:47 +0000138 def test_interface(self):
139 # Make sure object returned by urlopen() has the specified methods
140 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000141 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000142 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000143 "object returned by urlopen() lacks %s attribute" %
144 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000145
Brett Cannon74bfd702003-04-25 09:39:47 +0000146 def test_read(self):
147 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000148
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 def test_readline(self):
150 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000151 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 "calling readline() after exhausting the file did not"
153 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000154
Brett Cannon74bfd702003-04-25 09:39:47 +0000155 def test_readlines(self):
156 lines_list = self.returned_obj.readlines()
157 self.assertEqual(len(lines_list), 1,
158 "readlines() returned the wrong number of lines")
159 self.assertEqual(lines_list[0], self.text,
160 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000161
Brett Cannon74bfd702003-04-25 09:39:47 +0000162 def test_fileno(self):
163 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000164 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000165 self.assertEqual(os.read(file_num, len(self.text)), self.text,
166 "Reading on the file descriptor returned by fileno() "
167 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000168
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800170 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000171 # by the tearDown() method for the test
172 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000173
Brett Cannon74bfd702003-04-25 09:39:47 +0000174 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000175 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000176
Brett Cannon74bfd702003-04-25 09:39:47 +0000177 def test_geturl(self):
178 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000179
Christian Heimes9bd667a2008-01-20 15:14:11 +0000180 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000181 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 def test_iter(self):
184 # Test iterator
185 # Don't need to count number of iterations since test would fail the
186 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200187 # comparison.
188 # Use the iterator in the usual implicit way to test for ticket #4608.
189 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000190 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000191
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800192 def test_relativelocalfile(self):
193 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
194
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000195class ProxyTests(unittest.TestCase):
196
197 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000198 # Records changes to env vars
199 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000200 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000201 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000202 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000203 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000204
205 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000206 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000207 self.env.__exit__()
208 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000209
210 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000211 self.env.set('NO_PROXY', 'localhost')
212 proxies = urllib.request.getproxies_environment()
213 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000214 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800215 # List of no_proxies with space.
216 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
217 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000218
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700219class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000220 """Test urlopen() opening a fake http connection."""
221
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000222 def check_read(self, ver):
223 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000224 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000225 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000226 self.assertEqual(fp.readline(), b"Hello!")
227 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000228 self.assertEqual(fp.geturl(), 'http://python.org/')
229 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000230 finally:
231 self.unfakehttp()
232
Senthil Kumaran26430412011-04-13 07:01:19 +0800233 def test_url_fragment(self):
234 # Issue #11703: geturl() omits fragments in the original URL.
235 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800236 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800237 try:
238 fp = urllib.request.urlopen(url)
239 self.assertEqual(fp.geturl(), url)
240 finally:
241 self.unfakehttp()
242
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800243 def test_willclose(self):
244 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800245 try:
246 resp = urlopen("http://www.python.org")
247 self.assertTrue(resp.fp.will_close)
248 finally:
249 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800250
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000251 def test_read_0_9(self):
252 # "0.9" response accepted (but not "simple responses" without
253 # a status line)
254 self.check_read(b"0.9")
255
256 def test_read_1_0(self):
257 self.check_read(b"1.0")
258
259 def test_read_1_1(self):
260 self.check_read(b"1.1")
261
Christian Heimes57dddfb2008-01-02 18:30:52 +0000262 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200263 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000264 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
265Date: Wed, 02 Jan 2008 03:03:54 GMT
266Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
267Connection: close
268Content-Type: text/html; charset=iso-8859-1
269''')
270 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200271 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000272 finally:
273 self.unfakehttp()
274
guido@google.coma119df92011-03-29 11:41:02 -0700275 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200276 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700277 self.fakehttp(b'''HTTP/1.1 302 Found
278Date: Wed, 02 Jan 2008 03:03:54 GMT
279Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
280Location: file://guidocomputer.athome.com:/python/license
281Connection: close
282Content-Type: text/html; charset=iso-8859-1
283''')
284 try:
285 self.assertRaises(urllib.error.HTTPError, urlopen,
286 "http://python.org/")
287 finally:
288 self.unfakehttp()
289
Guido van Rossumd8faa362007-04-27 19:54:29 +0000290 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200291 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000292 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000293 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000294 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200295 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000296 finally:
297 self.unfakehttp()
298
Senthil Kumaranf5776862012-10-21 13:30:02 -0700299 def test_missing_localfile(self):
300 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700301 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700302 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700303 self.assertTrue(e.exception.filename)
304 self.assertTrue(e.exception.reason)
305
306 def test_file_notexists(self):
307 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700308 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700309 try:
310 self.assertTrue(os.path.exists(tmp_file))
311 with urlopen(tmp_fileurl) as fobj:
312 self.assertTrue(fobj)
313 finally:
314 os.close(fd)
315 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700316 self.assertFalse(os.path.exists(tmp_file))
317 with self.assertRaises(urllib.error.URLError):
318 urlopen(tmp_fileurl)
319
320 def test_ftp_nohost(self):
321 test_ftp_url = 'ftp:///path'
322 with self.assertRaises(urllib.error.URLError) as e:
323 urlopen(test_ftp_url)
324 self.assertFalse(e.exception.filename)
325 self.assertTrue(e.exception.reason)
326
327 def test_ftp_nonexisting(self):
328 with self.assertRaises(urllib.error.URLError) as e:
329 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
330 self.assertFalse(e.exception.filename)
331 self.assertTrue(e.exception.reason)
332
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700333 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
334 def test_ftp_cache_pruning(self):
335 self.fakeftp()
336 try:
337 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
338 urlopen('ftp://localhost')
339 finally:
340 self.unfakeftp()
341
Senthil Kumaranf5776862012-10-21 13:30:02 -0700342
Senthil Kumarande0eb242010-08-01 17:53:37 +0000343 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000344 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000345 try:
346 fp = urlopen("http://user:pass@python.org/")
347 self.assertEqual(fp.readline(), b"Hello!")
348 self.assertEqual(fp.readline(), b"")
349 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
350 self.assertEqual(fp.getcode(), 200)
351 finally:
352 self.unfakehttp()
353
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800354 def test_userpass_inurl_w_spaces(self):
355 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
356 try:
357 userpass = "a b:c d"
358 url = "http://{}@python.org/".format(userpass)
359 fakehttp_wrapper = http.client.HTTPConnection
360 authorization = ("Authorization: Basic %s\r\n" %
361 b64encode(userpass.encode("ASCII")).decode("ASCII"))
362 fp = urlopen(url)
363 # The authorization header must be in place
364 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
365 self.assertEqual(fp.readline(), b"Hello!")
366 self.assertEqual(fp.readline(), b"")
367 # the spaces are quoted in URL so no match
368 self.assertNotEqual(fp.geturl(), url)
369 self.assertEqual(fp.getcode(), 200)
370 finally:
371 self.unfakehttp()
372
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700373 def test_URLopener_deprecation(self):
374 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700375 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700376
Antoine Pitroudf204be2012-11-24 17:59:08 +0100377class urlopen_DataTests(unittest.TestCase):
378 """Test urlopen() opening a data URL."""
379
380 def setUp(self):
381 # text containing URL special- and unicode-characters
382 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
383 # 2x1 pixel RGB PNG image with one black and one white pixel
384 self.image = (
385 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
386 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
387 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
388 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
389
390 self.text_url = (
391 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
392 "D%26%20%C3%B6%20%C3%84%20")
393 self.text_url_base64 = (
394 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
395 "sJT0mIPYgxCA%3D")
396 # base64 encoded data URL that contains ignorable spaces,
397 # such as "\n", " ", "%0A", and "%20".
398 self.image_url = (
399 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
400 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
401 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
402
403 self.text_url_resp = urllib.request.urlopen(self.text_url)
404 self.text_url_base64_resp = urllib.request.urlopen(
405 self.text_url_base64)
406 self.image_url_resp = urllib.request.urlopen(self.image_url)
407
408 def test_interface(self):
409 # Make sure object returned by urlopen() has the specified methods
410 for attr in ("read", "readline", "readlines",
411 "close", "info", "geturl", "getcode", "__iter__"):
412 self.assertTrue(hasattr(self.text_url_resp, attr),
413 "object returned by urlopen() lacks %s attribute" %
414 attr)
415
416 def test_info(self):
417 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
418 self.assertEqual(self.text_url_base64_resp.info().get_params(),
419 [('text/plain', ''), ('charset', 'ISO-8859-1')])
420 self.assertEqual(self.image_url_resp.info()['content-length'],
421 str(len(self.image)))
422 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
423 [('text/plain', ''), ('charset', 'US-ASCII')])
424
425 def test_geturl(self):
426 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
427 self.assertEqual(self.text_url_base64_resp.geturl(),
428 self.text_url_base64)
429 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
430
431 def test_read_text(self):
432 self.assertEqual(self.text_url_resp.read().decode(
433 dict(self.text_url_resp.info().get_params())['charset']), self.text)
434
435 def test_read_text_base64(self):
436 self.assertEqual(self.text_url_base64_resp.read().decode(
437 dict(self.text_url_base64_resp.info().get_params())['charset']),
438 self.text)
439
440 def test_read_image(self):
441 self.assertEqual(self.image_url_resp.read(), self.image)
442
443 def test_missing_comma(self):
444 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
445
446 def test_invalid_base64_data(self):
447 # missing padding character
448 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
449
Brett Cannon19691362003-04-29 05:08:06 +0000450class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000451 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000452
Brett Cannon19691362003-04-29 05:08:06 +0000453 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000454 # Create a list of temporary files. Each item in the list is a file
455 # name (absolute path or relative to the current working directory).
456 # All files in this list will be deleted in the tearDown method. Note,
457 # this only helps to makes sure temporary files get deleted, but it
458 # does nothing about trying to close files that may still be open. It
459 # is the responsibility of the developer to properly close files even
460 # when exceptional conditions occur.
461 self.tempFiles = []
462
Brett Cannon19691362003-04-29 05:08:06 +0000463 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000464 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000465 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000466 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000467 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000468 FILE.write(self.text)
469 FILE.close()
470 finally:
471 try: FILE.close()
472 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000473
474 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000475 # Delete the temporary files.
476 for each in self.tempFiles:
477 try: os.remove(each)
478 except: pass
479
480 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000481 filePath = os.path.abspath(filePath)
482 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000483 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000484 except UnicodeEncodeError:
485 raise unittest.SkipTest("filePath is not encodable to utf8")
486 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000487
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000488 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000489 """Creates a new temporary file containing the specified data,
490 registers the file for deletion during the test fixture tear down, and
491 returns the absolute path of the file."""
492
493 newFd, newFilePath = tempfile.mkstemp()
494 try:
495 self.registerFileForCleanUp(newFilePath)
496 newFile = os.fdopen(newFd, "wb")
497 newFile.write(data)
498 newFile.close()
499 finally:
500 try: newFile.close()
501 except: pass
502 return newFilePath
503
504 def registerFileForCleanUp(self, fileName):
505 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000506
507 def test_basic(self):
508 # Make sure that a local file just gets its own location returned and
509 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000510 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000511 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000512 self.assertIsInstance(result[1], email.message.Message,
513 "did not get a email.message.Message instance "
514 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000515
516 def test_copy(self):
517 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000518 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000519 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000520 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000521 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000522 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000523 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000524 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000525 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000526 try:
527 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000528 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000529 finally:
530 try: FILE.close()
531 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000532 self.assertEqual(self.text, text)
533
534 def test_reporthook(self):
535 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700536 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
537 self.assertIsInstance(block_count, int)
538 self.assertIsInstance(block_read_size, int)
539 self.assertIsInstance(file_size, int)
540 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000541 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000542 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000543 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000544 urllib.request.urlretrieve(
545 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000546 second_temp, hooktester)
547
548 def test_reporthook_0_bytes(self):
549 # Test on zero length file. Should call reporthook only 1 time.
550 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700551 def hooktester(block_count, block_read_size, file_size, _report=report):
552 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000553 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000554 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000555 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000556 self.assertEqual(len(report), 1)
557 self.assertEqual(report[0][2], 0)
558
559 def test_reporthook_5_bytes(self):
560 # Test on 5 byte file. Should call reporthook only 2 times (once when
561 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700562 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000563 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700564 def hooktester(block_count, block_read_size, file_size, _report=report):
565 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000566 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000567 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000568 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000569 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800570 self.assertEqual(report[0][2], 5)
571 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000572
573 def test_reporthook_8193_bytes(self):
574 # Test on 8193 byte file. Should call reporthook only 3 times (once
575 # when the "network connection" is established, once for the next 8192
576 # bytes, and once for the last byte).
577 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700578 def hooktester(block_count, block_read_size, file_size, _report=report):
579 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000580 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000581 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000582 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000583 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800584 self.assertEqual(report[0][2], 8193)
585 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700586 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800587 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000588
Senthil Kumarance260142011-11-01 01:35:17 +0800589
590class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
591 """Test urllib.urlretrieve() using fake http connections"""
592
593 def test_short_content_raises_ContentTooShortError(self):
594 self.fakehttp(b'''HTTP/1.1 200 OK
595Date: Wed, 02 Jan 2008 03:03:54 GMT
596Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
597Connection: close
598Content-Length: 100
599Content-Type: text/html; charset=iso-8859-1
600
601FF
602''')
603
604 def _reporthook(par1, par2, par3):
605 pass
606
607 with self.assertRaises(urllib.error.ContentTooShortError):
608 try:
609 urllib.request.urlretrieve('http://example.com/',
610 reporthook=_reporthook)
611 finally:
612 self.unfakehttp()
613
614 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
615 self.fakehttp(b'''HTTP/1.1 200 OK
616Date: Wed, 02 Jan 2008 03:03:54 GMT
617Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
618Connection: close
619Content-Length: 100
620Content-Type: text/html; charset=iso-8859-1
621
622FF
623''')
624 with self.assertRaises(urllib.error.ContentTooShortError):
625 try:
626 urllib.request.urlretrieve('http://example.com/')
627 finally:
628 self.unfakehttp()
629
630
Brett Cannon74bfd702003-04-25 09:39:47 +0000631class QuotingTests(unittest.TestCase):
632 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000633
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000634 According to RFC 2396 (Uniform Resource Identifiers), to escape a
635 character you write it as '%' + <2 character US-ASCII hex value>.
636 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
637 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000638
639 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000640
Brett Cannon74bfd702003-04-25 09:39:47 +0000641 Reserved characters : ";/?:@&=+$,"
642 Have special meaning in URIs and must be escaped if not being used for
643 their special meaning
644 Data characters : letters, digits, and "-_.!~*'()"
645 Unreserved and do not need to be escaped; can be, though, if desired
646 Control characters : 0x00 - 0x1F, 0x7F
647 Have no use in URIs so must be escaped
648 space : 0x20
649 Must be escaped
650 Delimiters : '<>#%"'
651 Must be escaped
652 Unwise : "{}|\^[]`"
653 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000654
Brett Cannon74bfd702003-04-25 09:39:47 +0000655 """
656
657 def test_never_quote(self):
658 # Make sure quote() does not quote letters, digits, and "_,.-"
659 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
660 "abcdefghijklmnopqrstuvwxyz",
661 "0123456789",
662 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000663 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000664 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000665 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000666 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000667 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000668 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000669
670 def test_default_safe(self):
671 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000672 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000673
674 def test_safe(self):
675 # Test setting 'safe' parameter does what it should do
676 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000677 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000678 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000679 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000680 result = urllib.parse.quote_plus(quote_by_default,
681 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000682 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000683 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000684 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000685 # Safe expressed as bytes rather than str
686 result = urllib.parse.quote(quote_by_default, safe=b"<>")
687 self.assertEqual(quote_by_default, result,
688 "using quote(): %r != %r" % (quote_by_default, result))
689 # "Safe" non-ASCII characters should have no effect
690 # (Since URIs are not allowed to have non-ASCII characters)
691 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
692 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
693 self.assertEqual(expect, result,
694 "using quote(): %r != %r" %
695 (expect, result))
696 # Same as above, but using a bytes rather than str
697 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
698 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
699 self.assertEqual(expect, result,
700 "using quote(): %r != %r" %
701 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000702
703 def test_default_quoting(self):
704 # Make sure all characters that should be quoted are by default sans
705 # space (separate test for that).
706 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
707 should_quote.append('<>#%"{}|\^[]`')
708 should_quote.append(chr(127)) # For 0x7F
709 should_quote = ''.join(should_quote)
710 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000711 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000712 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000713 "using quote(): "
714 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000715 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000716 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000717 self.assertEqual(hexescape(char), result,
718 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000719 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000720 (char, hexescape(char), result))
721 del should_quote
722 partial_quote = "ab[]cd"
723 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000724 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000725 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000726 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800727 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000728 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000729 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000730
731 def test_quoting_space(self):
732 # Make sure quote() and quote_plus() handle spaces as specified in
733 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000734 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000735 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000736 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000737 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000738 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000739 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000740 given = "a b cd e f"
741 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000742 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000743 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000744 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000745 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000746 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000747 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000748 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000749
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000750 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000751 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000752 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000753 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000754 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000755 # Test with bytes
756 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
757 'alpha%2Bbeta+gamma')
758 # Test with safe bytes
759 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
760 'alpha+beta+gamma')
761
762 def test_quote_bytes(self):
763 # Bytes should quote directly to percent-encoded values
764 given = b"\xa2\xd8ab\xff"
765 expect = "%A2%D8ab%FF"
766 result = urllib.parse.quote(given)
767 self.assertEqual(expect, result,
768 "using quote(): %r != %r" % (expect, result))
769 # Encoding argument should raise type error on bytes input
770 self.assertRaises(TypeError, urllib.parse.quote, given,
771 encoding="latin-1")
772 # quote_from_bytes should work the same
773 result = urllib.parse.quote_from_bytes(given)
774 self.assertEqual(expect, result,
775 "using quote_from_bytes(): %r != %r"
776 % (expect, result))
777
778 def test_quote_with_unicode(self):
779 # Characters in Latin-1 range, encoded by default in UTF-8
780 given = "\xa2\xd8ab\xff"
781 expect = "%C2%A2%C3%98ab%C3%BF"
782 result = urllib.parse.quote(given)
783 self.assertEqual(expect, result,
784 "using quote(): %r != %r" % (expect, result))
785 # Characters in Latin-1 range, encoded by with None (default)
786 result = urllib.parse.quote(given, encoding=None, errors=None)
787 self.assertEqual(expect, result,
788 "using quote(): %r != %r" % (expect, result))
789 # Characters in Latin-1 range, encoded with Latin-1
790 given = "\xa2\xd8ab\xff"
791 expect = "%A2%D8ab%FF"
792 result = urllib.parse.quote(given, encoding="latin-1")
793 self.assertEqual(expect, result,
794 "using quote(): %r != %r" % (expect, result))
795 # Characters in BMP, encoded by default in UTF-8
796 given = "\u6f22\u5b57" # "Kanji"
797 expect = "%E6%BC%A2%E5%AD%97"
798 result = urllib.parse.quote(given)
799 self.assertEqual(expect, result,
800 "using quote(): %r != %r" % (expect, result))
801 # Characters in BMP, encoded with Latin-1
802 given = "\u6f22\u5b57"
803 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
804 encoding="latin-1")
805 # Characters in BMP, encoded with Latin-1, with replace error handling
806 given = "\u6f22\u5b57"
807 expect = "%3F%3F" # "??"
808 result = urllib.parse.quote(given, encoding="latin-1",
809 errors="replace")
810 self.assertEqual(expect, result,
811 "using quote(): %r != %r" % (expect, result))
812 # Characters in BMP, Latin-1, with xmlcharref error handling
813 given = "\u6f22\u5b57"
814 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
815 result = urllib.parse.quote(given, encoding="latin-1",
816 errors="xmlcharrefreplace")
817 self.assertEqual(expect, result,
818 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000819
Georg Brandlfaf41492009-05-26 18:31:11 +0000820 def test_quote_plus_with_unicode(self):
821 # Encoding (latin-1) test for quote_plus
822 given = "\xa2\xd8 \xff"
823 expect = "%A2%D8+%FF"
824 result = urllib.parse.quote_plus(given, encoding="latin-1")
825 self.assertEqual(expect, result,
826 "using quote_plus(): %r != %r" % (expect, result))
827 # Errors test for quote_plus
828 given = "ab\u6f22\u5b57 cd"
829 expect = "ab%3F%3F+cd"
830 result = urllib.parse.quote_plus(given, encoding="latin-1",
831 errors="replace")
832 self.assertEqual(expect, result,
833 "using quote_plus(): %r != %r" % (expect, result))
834
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000835
Brett Cannon74bfd702003-04-25 09:39:47 +0000836class UnquotingTests(unittest.TestCase):
837 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000838
Brett Cannon74bfd702003-04-25 09:39:47 +0000839 See the doc string for quoting_Tests for details on quoting and such.
840
841 """
842
843 def test_unquoting(self):
844 # Make sure unquoting of all ASCII values works
845 escape_list = []
846 for num in range(128):
847 given = hexescape(chr(num))
848 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000849 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000850 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000851 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000852 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000853 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000854 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000855 (expect, result))
856 escape_list.append(given)
857 escape_string = ''.join(escape_list)
858 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000859 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000860 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000861 "using unquote(): not all characters escaped: "
862 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000863 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
864 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000865 with support.check_warnings(('', BytesWarning), quiet=True):
866 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000867
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000868 def test_unquoting_badpercent(self):
869 # Test unquoting on bad percent-escapes
870 given = '%xab'
871 expect = given
872 result = urllib.parse.unquote(given)
873 self.assertEqual(expect, result, "using unquote(): %r != %r"
874 % (expect, result))
875 given = '%x'
876 expect = given
877 result = urllib.parse.unquote(given)
878 self.assertEqual(expect, result, "using unquote(): %r != %r"
879 % (expect, result))
880 given = '%'
881 expect = given
882 result = urllib.parse.unquote(given)
883 self.assertEqual(expect, result, "using unquote(): %r != %r"
884 % (expect, result))
885 # unquote_to_bytes
886 given = '%xab'
887 expect = bytes(given, 'ascii')
888 result = urllib.parse.unquote_to_bytes(given)
889 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
890 % (expect, result))
891 given = '%x'
892 expect = bytes(given, 'ascii')
893 result = urllib.parse.unquote_to_bytes(given)
894 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
895 % (expect, result))
896 given = '%'
897 expect = bytes(given, 'ascii')
898 result = urllib.parse.unquote_to_bytes(given)
899 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
900 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000901 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
902 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000903
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000904 def test_unquoting_mixed_case(self):
905 # Test unquoting on mixed-case hex digits in the percent-escapes
906 given = '%Ab%eA'
907 expect = b'\xab\xea'
908 result = urllib.parse.unquote_to_bytes(given)
909 self.assertEqual(expect, result,
910 "using unquote_to_bytes(): %r != %r"
911 % (expect, result))
912
Brett Cannon74bfd702003-04-25 09:39:47 +0000913 def test_unquoting_parts(self):
914 # Make sure unquoting works when have non-quoted characters
915 # interspersed
916 given = 'ab%sd' % hexescape('c')
917 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000918 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000919 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000920 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000921 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000922 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000923 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000924
Brett Cannon74bfd702003-04-25 09:39:47 +0000925 def test_unquoting_plus(self):
926 # Test difference between unquote() and unquote_plus()
927 given = "are+there+spaces..."
928 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000929 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000930 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000931 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000932 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000933 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000934 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000935 "using unquote_plus(): %r != %r" % (expect, result))
936
937 def test_unquote_to_bytes(self):
938 given = 'br%C3%BCckner_sapporo_20050930.doc'
939 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
940 result = urllib.parse.unquote_to_bytes(given)
941 self.assertEqual(expect, result,
942 "using unquote_to_bytes(): %r != %r"
943 % (expect, result))
944 # Test on a string with unescaped non-ASCII characters
945 # (Technically an invalid URI; expect those characters to be UTF-8
946 # encoded).
947 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
948 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
949 self.assertEqual(expect, result,
950 "using unquote_to_bytes(): %r != %r"
951 % (expect, result))
952 # Test with a bytes as input
953 given = b'%A2%D8ab%FF'
954 expect = b'\xa2\xd8ab\xff'
955 result = urllib.parse.unquote_to_bytes(given)
956 self.assertEqual(expect, result,
957 "using unquote_to_bytes(): %r != %r"
958 % (expect, result))
959 # Test with a bytes as input, with unescaped non-ASCII bytes
960 # (Technically an invalid URI; expect those bytes to be preserved)
961 given = b'%A2\xd8ab%FF'
962 expect = b'\xa2\xd8ab\xff'
963 result = urllib.parse.unquote_to_bytes(given)
964 self.assertEqual(expect, result,
965 "using unquote_to_bytes(): %r != %r"
966 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000967
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000968 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000969 # Characters in the Latin-1 range, encoded with UTF-8
970 given = 'br%C3%BCckner_sapporo_20050930.doc'
971 expect = 'br\u00fcckner_sapporo_20050930.doc'
972 result = urllib.parse.unquote(given)
973 self.assertEqual(expect, result,
974 "using unquote(): %r != %r" % (expect, result))
975 # Characters in the Latin-1 range, encoded with None (default)
976 result = urllib.parse.unquote(given, encoding=None, errors=None)
977 self.assertEqual(expect, result,
978 "using unquote(): %r != %r" % (expect, result))
979
980 # Characters in the Latin-1 range, encoded with Latin-1
981 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
982 encoding="latin-1")
983 expect = 'br\u00fcckner_sapporo_20050930.doc'
984 self.assertEqual(expect, result,
985 "using unquote(): %r != %r" % (expect, result))
986
987 # Characters in BMP, encoded with UTF-8
988 given = "%E6%BC%A2%E5%AD%97"
989 expect = "\u6f22\u5b57" # "Kanji"
990 result = urllib.parse.unquote(given)
991 self.assertEqual(expect, result,
992 "using unquote(): %r != %r" % (expect, result))
993
994 # Decode with UTF-8, invalid sequence
995 given = "%F3%B1"
996 expect = "\ufffd" # Replacement character
997 result = urllib.parse.unquote(given)
998 self.assertEqual(expect, result,
999 "using unquote(): %r != %r" % (expect, result))
1000
1001 # Decode with UTF-8, invalid sequence, replace errors
1002 result = urllib.parse.unquote(given, errors="replace")
1003 self.assertEqual(expect, result,
1004 "using unquote(): %r != %r" % (expect, result))
1005
1006 # Decode with UTF-8, invalid sequence, ignoring errors
1007 given = "%F3%B1"
1008 expect = ""
1009 result = urllib.parse.unquote(given, errors="ignore")
1010 self.assertEqual(expect, result,
1011 "using unquote(): %r != %r" % (expect, result))
1012
1013 # A mix of non-ASCII and percent-encoded characters, UTF-8
1014 result = urllib.parse.unquote("\u6f22%C3%BC")
1015 expect = '\u6f22\u00fc'
1016 self.assertEqual(expect, result,
1017 "using unquote(): %r != %r" % (expect, result))
1018
1019 # A mix of non-ASCII and percent-encoded characters, Latin-1
1020 # (Note, the string contains non-Latin-1-representable characters)
1021 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1022 expect = '\u6f22\u00fc'
1023 self.assertEqual(expect, result,
1024 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001025
Brett Cannon74bfd702003-04-25 09:39:47 +00001026class urlencode_Tests(unittest.TestCase):
1027 """Tests for urlencode()"""
1028
1029 def help_inputtype(self, given, test_type):
1030 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001031
Brett Cannon74bfd702003-04-25 09:39:47 +00001032 'given' must lead to only the pairs:
1033 * 1st, 1
1034 * 2nd, 2
1035 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001036
Brett Cannon74bfd702003-04-25 09:39:47 +00001037 Test cannot assume anything about order. Docs make no guarantee and
1038 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001039
Brett Cannon74bfd702003-04-25 09:39:47 +00001040 """
1041 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001042 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001043 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001044 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001045 "testing %s: %s not found in %s" %
1046 (test_type, expected, result))
1047 self.assertEqual(result.count('&'), 2,
1048 "testing %s: expected 2 '&'s; got %s" %
1049 (test_type, result.count('&')))
1050 amp_location = result.index('&')
1051 on_amp_left = result[amp_location - 1]
1052 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001053 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001054 "testing %s: '&' not located in proper place in %s" %
1055 (test_type, result))
1056 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1057 "testing %s: "
1058 "unexpected number of characters: %s != %s" %
1059 (test_type, len(result), (5 * 3) + 2))
1060
1061 def test_using_mapping(self):
1062 # Test passing in a mapping object as an argument.
1063 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1064 "using dict as input type")
1065
1066 def test_using_sequence(self):
1067 # Test passing in a sequence of two-item sequences as an argument.
1068 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1069 "using sequence of two-item tuples as input")
1070
1071 def test_quoting(self):
1072 # Make sure keys and values are quoted using quote_plus()
1073 given = {"&":"="}
1074 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001075 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001076 self.assertEqual(expect, result)
1077 given = {"key name":"A bunch of pluses"}
1078 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001079 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001080 self.assertEqual(expect, result)
1081
1082 def test_doseq(self):
1083 # Test that passing True for 'doseq' parameter works correctly
1084 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001085 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1086 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001087 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001088 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001089 for value in given["sequence"]:
1090 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001091 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001092 self.assertEqual(result.count('&'), 2,
1093 "Expected 2 '&'s, got %s" % result.count('&'))
1094
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001095 def test_empty_sequence(self):
1096 self.assertEqual("", urllib.parse.urlencode({}))
1097 self.assertEqual("", urllib.parse.urlencode([]))
1098
1099 def test_nonstring_values(self):
1100 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1101 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1102
1103 def test_nonstring_seq_values(self):
1104 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1105 self.assertEqual("a=None&a=a",
1106 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001107 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001108 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001109 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001110
Senthil Kumarandf022da2010-07-03 17:48:22 +00001111 def test_urlencode_encoding(self):
1112 # ASCII encoding. Expect %3F with errors="replace'
1113 given = (('\u00a0', '\u00c1'),)
1114 expect = '%3F=%3F'
1115 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1116 self.assertEqual(expect, result)
1117
1118 # Default is UTF-8 encoding.
1119 given = (('\u00a0', '\u00c1'),)
1120 expect = '%C2%A0=%C3%81'
1121 result = urllib.parse.urlencode(given)
1122 self.assertEqual(expect, result)
1123
1124 # Latin-1 encoding.
1125 given = (('\u00a0', '\u00c1'),)
1126 expect = '%A0=%C1'
1127 result = urllib.parse.urlencode(given, encoding="latin-1")
1128 self.assertEqual(expect, result)
1129
1130 def test_urlencode_encoding_doseq(self):
1131 # ASCII Encoding. Expect %3F with errors="replace'
1132 given = (('\u00a0', '\u00c1'),)
1133 expect = '%3F=%3F'
1134 result = urllib.parse.urlencode(given, doseq=True,
1135 encoding="ASCII", errors="replace")
1136 self.assertEqual(expect, result)
1137
1138 # ASCII Encoding. On a sequence of values.
1139 given = (("\u00a0", (1, "\u00c1")),)
1140 expect = '%3F=1&%3F=%3F'
1141 result = urllib.parse.urlencode(given, True,
1142 encoding="ASCII", errors="replace")
1143 self.assertEqual(expect, result)
1144
1145 # Utf-8
1146 given = (("\u00a0", "\u00c1"),)
1147 expect = '%C2%A0=%C3%81'
1148 result = urllib.parse.urlencode(given, True)
1149 self.assertEqual(expect, result)
1150
1151 given = (("\u00a0", (42, "\u00c1")),)
1152 expect = '%C2%A0=42&%C2%A0=%C3%81'
1153 result = urllib.parse.urlencode(given, True)
1154 self.assertEqual(expect, result)
1155
1156 # latin-1
1157 given = (("\u00a0", "\u00c1"),)
1158 expect = '%A0=%C1'
1159 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1160 self.assertEqual(expect, result)
1161
1162 given = (("\u00a0", (42, "\u00c1")),)
1163 expect = '%A0=42&%A0=%C1'
1164 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1165 self.assertEqual(expect, result)
1166
1167 def test_urlencode_bytes(self):
1168 given = ((b'\xa0\x24', b'\xc1\x24'),)
1169 expect = '%A0%24=%C1%24'
1170 result = urllib.parse.urlencode(given)
1171 self.assertEqual(expect, result)
1172 result = urllib.parse.urlencode(given, True)
1173 self.assertEqual(expect, result)
1174
1175 # Sequence of values
1176 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1177 expect = '%A0%24=42&%A0%24=%C1%24'
1178 result = urllib.parse.urlencode(given, True)
1179 self.assertEqual(expect, result)
1180
1181 def test_urlencode_encoding_safe_parameter(self):
1182
1183 # Send '$' (\x24) as safe character
1184 # Default utf-8 encoding
1185
1186 given = ((b'\xa0\x24', b'\xc1\x24'),)
1187 result = urllib.parse.urlencode(given, safe=":$")
1188 expect = '%A0$=%C1$'
1189 self.assertEqual(expect, result)
1190
1191 given = ((b'\xa0\x24', b'\xc1\x24'),)
1192 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1193 expect = '%A0$=%C1$'
1194 self.assertEqual(expect, result)
1195
1196 # Safe parameter in sequence
1197 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1198 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1199 result = urllib.parse.urlencode(given, True, safe=":$")
1200 self.assertEqual(expect, result)
1201
1202 # Test all above in latin-1 encoding
1203
1204 given = ((b'\xa0\x24', b'\xc1\x24'),)
1205 result = urllib.parse.urlencode(given, safe=":$",
1206 encoding="latin-1")
1207 expect = '%A0$=%C1$'
1208 self.assertEqual(expect, result)
1209
1210 given = ((b'\xa0\x24', b'\xc1\x24'),)
1211 expect = '%A0$=%C1$'
1212 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1213 encoding="latin-1")
1214
1215 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1216 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1217 result = urllib.parse.urlencode(given, True, safe=":$",
1218 encoding="latin-1")
1219 self.assertEqual(expect, result)
1220
Brett Cannon74bfd702003-04-25 09:39:47 +00001221class Pathname_Tests(unittest.TestCase):
1222 """Test pathname2url() and url2pathname()"""
1223
1224 def test_basic(self):
1225 # Make sure simple tests pass
1226 expected_path = os.path.join("parts", "of", "a", "path")
1227 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001228 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001229 self.assertEqual(expected_url, result,
1230 "pathname2url() failed; %s != %s" %
1231 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001232 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001233 self.assertEqual(expected_path, result,
1234 "url2pathame() failed; %s != %s" %
1235 (result, expected_path))
1236
1237 def test_quoting(self):
1238 # Test automatic quoting and unquoting works for pathnam2url() and
1239 # url2pathname() respectively
1240 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001241 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1242 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001243 self.assertEqual(expect, result,
1244 "pathname2url() failed; %s != %s" %
1245 (expect, result))
1246 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001247 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001248 self.assertEqual(expect, result,
1249 "url2pathname() failed; %s != %s" %
1250 (expect, result))
1251 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001252 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1253 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001254 self.assertEqual(expect, result,
1255 "pathname2url() failed; %s != %s" %
1256 (expect, result))
1257 given = "make+sure/using_unquote"
1258 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001259 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001260 self.assertEqual(expect, result,
1261 "url2pathname() failed; %s != %s" %
1262 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001263
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001264 @unittest.skipUnless(sys.platform == 'win32',
1265 'test specific to the urllib.url2path function.')
1266 def test_ntpath(self):
1267 given = ('/C:/', '///C:/', '/C|//')
1268 expect = 'C:\\'
1269 for url in given:
1270 result = urllib.request.url2pathname(url)
1271 self.assertEqual(expect, result,
1272 'urllib.request..url2pathname() failed; %s != %s' %
1273 (expect, result))
1274 given = '///C|/path'
1275 expect = 'C:\\path'
1276 result = urllib.request.url2pathname(given)
1277 self.assertEqual(expect, result,
1278 'urllib.request.url2pathname() failed; %s != %s' %
1279 (expect, result))
1280
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001281class Utility_Tests(unittest.TestCase):
1282 """Testcase to test the various utility functions in the urllib."""
1283
1284 def test_splitpasswd(self):
1285 """Some of password examples are not sensible, but it is added to
1286 confirming to RFC2617 and addressing issue4675.
1287 """
1288 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1289 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1290 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1291 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1292 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1293 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1294 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001295 self.assertEqual(('user', 'a b'),urllib.parse.splitpasswd('user:a b'))
1296 self.assertEqual(('user 2', 'ab'),urllib.parse.splitpasswd('user 2:ab'))
1297 self.assertEqual(('user+1', 'a+b'),urllib.parse.splitpasswd('user+1:a+b'))
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001298
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001299 def test_thishost(self):
1300 """Test the urllib.request.thishost utility function returns a tuple"""
1301 self.assertIsInstance(urllib.request.thishost(), tuple)
1302
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001303
1304class URLopener_Tests(unittest.TestCase):
1305 """Testcase to test the open method of URLopener class."""
1306
1307 def test_quoted_open(self):
1308 class DummyURLopener(urllib.request.URLopener):
1309 def open_spam(self, url):
1310 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001311 with support.check_warnings(
1312 ('DummyURLopener style of invoking requests is deprecated.',
1313 DeprecationWarning)):
1314 self.assertEqual(DummyURLopener().open(
1315 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001316
Ezio Melotti79b99db2013-02-21 02:41:42 +02001317 # test the safe characters are not quoted by urlopen
1318 self.assertEqual(DummyURLopener().open(
1319 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1320 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001321
Guido van Rossume7ba4952007-06-06 23:52:48 +00001322# Just commented them out.
1323# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001324# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001325# fail in one of the tests, sometimes in other. I have a linux, and
1326# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001327# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001328# . Facundo
1329#
1330# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001331# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001332# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1333# serv.settimeout(3)
1334# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1335# serv.bind(("", 9093))
1336# serv.listen(5)
1337# try:
1338# conn, addr = serv.accept()
1339# conn.send("1 Hola mundo\n")
1340# cantdata = 0
1341# while cantdata < 13:
1342# data = conn.recv(13-cantdata)
1343# cantdata += len(data)
1344# time.sleep(.3)
1345# conn.send("2 No more lines\n")
1346# conn.close()
1347# except socket.timeout:
1348# pass
1349# finally:
1350# serv.close()
1351# evt.set()
1352#
1353# class FTPWrapperTests(unittest.TestCase):
1354#
1355# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001356# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001357# ftplib.FTP.port = 9093
1358# self.evt = threading.Event()
1359# threading.Thread(target=server, args=(self.evt,)).start()
1360# time.sleep(.1)
1361#
1362# def tearDown(self):
1363# self.evt.wait()
1364#
1365# def testBasic(self):
1366# # connects
1367# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001368# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001369#
1370# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001371# # global default timeout is ignored
1372# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001373# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001374# socket.setdefaulttimeout(30)
1375# try:
1376# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1377# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001378# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001379# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001380# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001381#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001382# def testTimeoutDefault(self):
1383# # global default timeout is used
1384# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001385# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001386# socket.setdefaulttimeout(30)
1387# try:
1388# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1389# finally:
1390# socket.setdefaulttimeout(None)
1391# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1392# ftp.close()
1393#
1394# def testTimeoutValue(self):
1395# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1396# timeout=30)
1397# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1398# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001399
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001400
Senthil Kumarande49d642011-10-16 23:54:44 +08001401class RequestTests(unittest.TestCase):
1402 """Unit tests for urllib.request.Request."""
1403
1404 def test_default_values(self):
1405 Request = urllib.request.Request
1406 request = Request("http://www.python.org")
1407 self.assertEqual(request.get_method(), 'GET')
1408 request = Request("http://www.python.org", {})
1409 self.assertEqual(request.get_method(), 'POST')
1410
1411 def test_with_method_arg(self):
1412 Request = urllib.request.Request
1413 request = Request("http://www.python.org", method='HEAD')
1414 self.assertEqual(request.method, 'HEAD')
1415 self.assertEqual(request.get_method(), 'HEAD')
1416 request = Request("http://www.python.org", {}, method='HEAD')
1417 self.assertEqual(request.method, 'HEAD')
1418 self.assertEqual(request.get_method(), 'HEAD')
1419 request = Request("http://www.python.org", method='GET')
1420 self.assertEqual(request.get_method(), 'GET')
1421 request.method = 'HEAD'
1422 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001423
1424
Senthil Kumaran277e9092013-04-10 20:51:19 -07001425class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001426
Senthil Kumaran277e9092013-04-10 20:51:19 -07001427 def test_converting_drive_letter(self):
1428 self.assertEqual(url2pathname("///C|"), 'C:')
1429 self.assertEqual(url2pathname("///C:"), 'C:')
1430 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001431
Senthil Kumaran277e9092013-04-10 20:51:19 -07001432 def test_converting_when_no_drive_letter(self):
1433 # cannot end a raw string in \
1434 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1435 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1436
1437 def test_simple_compare(self):
1438 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1439 r'C:\foo\bar\spam.foo')
1440
1441 def test_non_ascii_drive_letter(self):
1442 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1443
1444 def test_roundtrip_url2pathname(self):
1445 list_of_paths = ['C:',
1446 r'\\\C\test\\',
1447 r'C:\foo\bar\spam.foo'
1448 ]
1449 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001450 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001451
1452class PathName2URLTests(unittest.TestCase):
1453
1454 def test_converting_drive_letter(self):
1455 self.assertEqual(pathname2url("C:"), '///C:')
1456 self.assertEqual(pathname2url("C:\\"), '///C:')
1457
1458 def test_converting_when_no_drive_letter(self):
1459 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1460 '/////folder/test/')
1461 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1462 '////folder/test/')
1463 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1464 '/folder/test/')
1465
1466 def test_simple_compare(self):
1467 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1468 "///C:/foo/bar/spam.foo" )
1469
1470 def test_long_drive_letter(self):
1471 self.assertRaises(IOError, pathname2url, "XX:\\")
1472
1473 def test_roundtrip_pathname2url(self):
1474 list_of_paths = ['///C:',
1475 '/////folder/test/',
1476 '///C:/foo/bar/spam.foo']
1477 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001478 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001479
1480if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001481 unittest.main()