blob: 7eb34c8ccfa192a5db977527e8deb209a3bda2cc [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070010from unittest.mock import patch
Benjamin Petersonee8712c2008-05-20 21:35:26 +000011from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000012import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080013import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000014import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070015from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000016
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080017from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010018import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080019
Senthil Kumaran8b081b72013-04-10 20:53:12 -070020
Brett Cannon74bfd702003-04-25 09:39:47 +000021def hexescape(char):
22 """Escape char as RFC 2396 specifies"""
23 hex_repr = hex(ord(char))[2:].upper()
24 if len(hex_repr) == 1:
25 hex_repr = "0%s" % hex_repr
26 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000027
Jeremy Hylton1afc1692008-06-18 20:49:58 +000028# Shortcut for testing FancyURLopener
29_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070030
31
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032def urlopen(url, data=None, proxies=None):
33 """urlopen(url [, data]) -> open file-like object"""
34 global _urlopener
35 if proxies is not None:
36 opener = urllib.request.FancyURLopener(proxies=proxies)
37 elif not _urlopener:
Ezio Melotti79b99db2013-02-21 02:41:42 +020038 with support.check_warnings(
39 ('FancyURLopener style of invoking requests is deprecated.',
40 DeprecationWarning)):
41 opener = urllib.request.FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000042 _urlopener = opener
43 else:
44 opener = _urlopener
45 if data is None:
46 return opener.open(url)
47 else:
48 return opener.open(url, data)
49
Senthil Kumarance260142011-11-01 01:35:17 +080050
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030051def fakehttp(fakedata):
52 class FakeSocket(io.BytesIO):
53 io_refs = 1
54
55 def sendall(self, data):
56 FakeHTTPConnection.buf = data
57
58 def makefile(self, *args, **kwds):
59 self.io_refs += 1
60 return self
61
62 def read(self, amt=None):
63 if self.closed:
64 return b""
65 return io.BytesIO.read(self, amt)
66
67 def readline(self, length=None):
68 if self.closed:
69 return b""
70 return io.BytesIO.readline(self, length)
71
72 def close(self):
73 self.io_refs -= 1
74 if self.io_refs == 0:
75 io.BytesIO.close(self)
76
77 class FakeHTTPConnection(http.client.HTTPConnection):
78
79 # buffer to store data for verification in urlopen tests.
80 buf = None
81 fakesock = FakeSocket(fakedata)
82
83 def connect(self):
84 self.sock = self.fakesock
85
86 return FakeHTTPConnection
87
88
Senthil Kumarance260142011-11-01 01:35:17 +080089class FakeHTTPMixin(object):
90 def fakehttp(self, fakedata):
Senthil Kumarance260142011-11-01 01:35:17 +080091 self._connection_class = http.client.HTTPConnection
Serhiy Storchakaf54c3502014-09-06 21:41:39 +030092 http.client.HTTPConnection = fakehttp(fakedata)
Senthil Kumarance260142011-11-01 01:35:17 +080093
94 def unfakehttp(self):
95 http.client.HTTPConnection = self._connection_class
96
97
Benjamin Peterson3c2dca62014-06-07 15:08:04 -070098class FakeFTPMixin(object):
99 def fakeftp(self):
100 class FakeFtpWrapper(object):
101 def __init__(self, user, passwd, host, port, dirs, timeout=None,
102 persistent=True):
103 pass
104
105 def retrfile(self, file, type):
106 return io.BytesIO(), 0
107
108 def close(self):
109 pass
110
111 self._ftpwrapper_class = urllib.request.ftpwrapper
112 urllib.request.ftpwrapper = FakeFtpWrapper
113
114 def unfakeftp(self):
115 urllib.request.ftpwrapper = self._ftpwrapper_class
116
117
Brett Cannon74bfd702003-04-25 09:39:47 +0000118class urlopen_FileTests(unittest.TestCase):
119 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +0000120
Brett Cannon74bfd702003-04-25 09:39:47 +0000121 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +0000122 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000123
Brett Cannon74bfd702003-04-25 09:39:47 +0000124 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000125
Brett Cannon74bfd702003-04-25 09:39:47 +0000126 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000127 # Create a temp file to use for testing
128 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
129 "ascii")
130 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000131 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000132 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000133 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000134 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000135 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000137
Brett Cannon74bfd702003-04-25 09:39:47 +0000138 def tearDown(self):
139 """Shut down the open object"""
140 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000141 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000142
Brett Cannon74bfd702003-04-25 09:39:47 +0000143 def test_interface(self):
144 # Make sure object returned by urlopen() has the specified methods
145 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000146 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000147 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000148 "object returned by urlopen() lacks %s attribute" %
149 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000150
Brett Cannon74bfd702003-04-25 09:39:47 +0000151 def test_read(self):
152 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000153
Brett Cannon74bfd702003-04-25 09:39:47 +0000154 def test_readline(self):
155 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000156 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000157 "calling readline() after exhausting the file did not"
158 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000159
Brett Cannon74bfd702003-04-25 09:39:47 +0000160 def test_readlines(self):
161 lines_list = self.returned_obj.readlines()
162 self.assertEqual(len(lines_list), 1,
163 "readlines() returned the wrong number of lines")
164 self.assertEqual(lines_list[0], self.text,
165 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000166
Brett Cannon74bfd702003-04-25 09:39:47 +0000167 def test_fileno(self):
168 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000169 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000170 self.assertEqual(os.read(file_num, len(self.text)), self.text,
171 "Reading on the file descriptor returned by fileno() "
172 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000173
Brett Cannon74bfd702003-04-25 09:39:47 +0000174 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800175 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 # by the tearDown() method for the test
177 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000178
Brett Cannon74bfd702003-04-25 09:39:47 +0000179 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000180 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000181
Brett Cannon74bfd702003-04-25 09:39:47 +0000182 def test_geturl(self):
183 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000184
Christian Heimes9bd667a2008-01-20 15:14:11 +0000185 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000186 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000187
Brett Cannon74bfd702003-04-25 09:39:47 +0000188 def test_iter(self):
189 # Test iterator
190 # Don't need to count number of iterations since test would fail the
191 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200192 # comparison.
193 # Use the iterator in the usual implicit way to test for ticket #4608.
194 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000195 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000196
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800197 def test_relativelocalfile(self):
198 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
199
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000200class ProxyTests(unittest.TestCase):
201
202 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000203 # Records changes to env vars
204 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000205 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000206 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000207 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000208 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000209
210 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000211 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000212 self.env.__exit__()
213 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000214
215 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000216 self.env.set('NO_PROXY', 'localhost')
217 proxies = urllib.request.getproxies_environment()
218 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000219 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800220 # List of no_proxies with space.
221 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
222 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000223
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700224class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000225 """Test urlopen() opening a fake http connection."""
226
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000227 def check_read(self, ver):
228 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000229 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000230 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000231 self.assertEqual(fp.readline(), b"Hello!")
232 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000233 self.assertEqual(fp.geturl(), 'http://python.org/')
234 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000235 finally:
236 self.unfakehttp()
237
Senthil Kumaran26430412011-04-13 07:01:19 +0800238 def test_url_fragment(self):
239 # Issue #11703: geturl() omits fragments in the original URL.
240 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800241 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800242 try:
243 fp = urllib.request.urlopen(url)
244 self.assertEqual(fp.geturl(), url)
245 finally:
246 self.unfakehttp()
247
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800248 def test_willclose(self):
249 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800250 try:
251 resp = urlopen("http://www.python.org")
252 self.assertTrue(resp.fp.will_close)
253 finally:
254 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800255
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000256 def test_read_0_9(self):
257 # "0.9" response accepted (but not "simple responses" without
258 # a status line)
259 self.check_read(b"0.9")
260
261 def test_read_1_0(self):
262 self.check_read(b"1.0")
263
264 def test_read_1_1(self):
265 self.check_read(b"1.1")
266
Christian Heimes57dddfb2008-01-02 18:30:52 +0000267 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200268 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000269 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
270Date: Wed, 02 Jan 2008 03:03:54 GMT
271Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
272Connection: close
273Content-Type: text/html; charset=iso-8859-1
274''')
275 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200276 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000277 finally:
278 self.unfakehttp()
279
guido@google.coma119df92011-03-29 11:41:02 -0700280 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200281 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700282 self.fakehttp(b'''HTTP/1.1 302 Found
283Date: Wed, 02 Jan 2008 03:03:54 GMT
284Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
285Location: file://guidocomputer.athome.com:/python/license
286Connection: close
287Content-Type: text/html; charset=iso-8859-1
288''')
289 try:
290 self.assertRaises(urllib.error.HTTPError, urlopen,
291 "http://python.org/")
292 finally:
293 self.unfakehttp()
294
Guido van Rossumd8faa362007-04-27 19:54:29 +0000295 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200296 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000297 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000298 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000299 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200300 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000301 finally:
302 self.unfakehttp()
303
Senthil Kumaranf5776862012-10-21 13:30:02 -0700304 def test_missing_localfile(self):
305 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700306 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700307 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700308 self.assertTrue(e.exception.filename)
309 self.assertTrue(e.exception.reason)
310
311 def test_file_notexists(self):
312 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700313 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700314 try:
315 self.assertTrue(os.path.exists(tmp_file))
316 with urlopen(tmp_fileurl) as fobj:
317 self.assertTrue(fobj)
318 finally:
319 os.close(fd)
320 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700321 self.assertFalse(os.path.exists(tmp_file))
322 with self.assertRaises(urllib.error.URLError):
323 urlopen(tmp_fileurl)
324
325 def test_ftp_nohost(self):
326 test_ftp_url = 'ftp:///path'
327 with self.assertRaises(urllib.error.URLError) as e:
328 urlopen(test_ftp_url)
329 self.assertFalse(e.exception.filename)
330 self.assertTrue(e.exception.reason)
331
332 def test_ftp_nonexisting(self):
333 with self.assertRaises(urllib.error.URLError) as e:
334 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
335 self.assertFalse(e.exception.filename)
336 self.assertTrue(e.exception.reason)
337
Benjamin Peterson3c2dca62014-06-07 15:08:04 -0700338 @patch.object(urllib.request, 'MAXFTPCACHE', 0)
339 def test_ftp_cache_pruning(self):
340 self.fakeftp()
341 try:
342 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
343 urlopen('ftp://localhost')
344 finally:
345 self.unfakeftp()
346
Senthil Kumaranf5776862012-10-21 13:30:02 -0700347
Senthil Kumarande0eb242010-08-01 17:53:37 +0000348 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000349 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000350 try:
351 fp = urlopen("http://user:pass@python.org/")
352 self.assertEqual(fp.readline(), b"Hello!")
353 self.assertEqual(fp.readline(), b"")
354 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
355 self.assertEqual(fp.getcode(), 200)
356 finally:
357 self.unfakehttp()
358
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800359 def test_userpass_inurl_w_spaces(self):
360 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
361 try:
362 userpass = "a b:c d"
363 url = "http://{}@python.org/".format(userpass)
364 fakehttp_wrapper = http.client.HTTPConnection
365 authorization = ("Authorization: Basic %s\r\n" %
366 b64encode(userpass.encode("ASCII")).decode("ASCII"))
367 fp = urlopen(url)
368 # The authorization header must be in place
369 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
370 self.assertEqual(fp.readline(), b"Hello!")
371 self.assertEqual(fp.readline(), b"")
372 # the spaces are quoted in URL so no match
373 self.assertNotEqual(fp.geturl(), url)
374 self.assertEqual(fp.getcode(), 200)
375 finally:
376 self.unfakehttp()
377
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700378 def test_URLopener_deprecation(self):
379 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700380 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700381
Antoine Pitroudf204be2012-11-24 17:59:08 +0100382class urlopen_DataTests(unittest.TestCase):
383 """Test urlopen() opening a data URL."""
384
385 def setUp(self):
386 # text containing URL special- and unicode-characters
387 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
388 # 2x1 pixel RGB PNG image with one black and one white pixel
389 self.image = (
390 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
391 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
392 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
393 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
394
395 self.text_url = (
396 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
397 "D%26%20%C3%B6%20%C3%84%20")
398 self.text_url_base64 = (
399 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
400 "sJT0mIPYgxCA%3D")
401 # base64 encoded data URL that contains ignorable spaces,
402 # such as "\n", " ", "%0A", and "%20".
403 self.image_url = (
404 "\n"
405 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
406 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
407
408 self.text_url_resp = urllib.request.urlopen(self.text_url)
409 self.text_url_base64_resp = urllib.request.urlopen(
410 self.text_url_base64)
411 self.image_url_resp = urllib.request.urlopen(self.image_url)
412
413 def test_interface(self):
414 # Make sure object returned by urlopen() has the specified methods
415 for attr in ("read", "readline", "readlines",
416 "close", "info", "geturl", "getcode", "__iter__"):
417 self.assertTrue(hasattr(self.text_url_resp, attr),
418 "object returned by urlopen() lacks %s attribute" %
419 attr)
420
421 def test_info(self):
422 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
423 self.assertEqual(self.text_url_base64_resp.info().get_params(),
424 [('text/plain', ''), ('charset', 'ISO-8859-1')])
425 self.assertEqual(self.image_url_resp.info()['content-length'],
426 str(len(self.image)))
427 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
428 [('text/plain', ''), ('charset', 'US-ASCII')])
429
430 def test_geturl(self):
431 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
432 self.assertEqual(self.text_url_base64_resp.geturl(),
433 self.text_url_base64)
434 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
435
436 def test_read_text(self):
437 self.assertEqual(self.text_url_resp.read().decode(
438 dict(self.text_url_resp.info().get_params())['charset']), self.text)
439
440 def test_read_text_base64(self):
441 self.assertEqual(self.text_url_base64_resp.read().decode(
442 dict(self.text_url_base64_resp.info().get_params())['charset']),
443 self.text)
444
445 def test_read_image(self):
446 self.assertEqual(self.image_url_resp.read(), self.image)
447
448 def test_missing_comma(self):
449 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
450
451 def test_invalid_base64_data(self):
452 # missing padding character
453 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
454
Brett Cannon19691362003-04-29 05:08:06 +0000455class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000456 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000457
Brett Cannon19691362003-04-29 05:08:06 +0000458 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000459 # Create a list of temporary files. Each item in the list is a file
460 # name (absolute path or relative to the current working directory).
461 # All files in this list will be deleted in the tearDown method. Note,
462 # this only helps to makes sure temporary files get deleted, but it
463 # does nothing about trying to close files that may still be open. It
464 # is the responsibility of the developer to properly close files even
465 # when exceptional conditions occur.
466 self.tempFiles = []
467
Brett Cannon19691362003-04-29 05:08:06 +0000468 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000469 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000470 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000471 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000472 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000473 FILE.write(self.text)
474 FILE.close()
475 finally:
476 try: FILE.close()
477 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000478
479 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000480 # Delete the temporary files.
481 for each in self.tempFiles:
482 try: os.remove(each)
483 except: pass
484
485 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000486 filePath = os.path.abspath(filePath)
487 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000488 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000489 except UnicodeEncodeError:
490 raise unittest.SkipTest("filePath is not encodable to utf8")
491 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000492
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000493 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000494 """Creates a new temporary file containing the specified data,
495 registers the file for deletion during the test fixture tear down, and
496 returns the absolute path of the file."""
497
498 newFd, newFilePath = tempfile.mkstemp()
499 try:
500 self.registerFileForCleanUp(newFilePath)
501 newFile = os.fdopen(newFd, "wb")
502 newFile.write(data)
503 newFile.close()
504 finally:
505 try: newFile.close()
506 except: pass
507 return newFilePath
508
509 def registerFileForCleanUp(self, fileName):
510 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000511
512 def test_basic(self):
513 # Make sure that a local file just gets its own location returned and
514 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000515 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000516 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000517 self.assertIsInstance(result[1], email.message.Message,
518 "did not get a email.message.Message instance "
519 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000520
521 def test_copy(self):
522 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000523 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000524 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000525 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000526 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000527 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000528 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000529 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000530 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000531 try:
532 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000533 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000534 finally:
535 try: FILE.close()
536 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000537 self.assertEqual(self.text, text)
538
539 def test_reporthook(self):
540 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700541 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
542 self.assertIsInstance(block_count, int)
543 self.assertIsInstance(block_read_size, int)
544 self.assertIsInstance(file_size, int)
545 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000546 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000547 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000548 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000549 urllib.request.urlretrieve(
550 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000551 second_temp, hooktester)
552
553 def test_reporthook_0_bytes(self):
554 # Test on zero length file. Should call reporthook only 1 time.
555 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700556 def hooktester(block_count, block_read_size, file_size, _report=report):
557 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000558 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000559 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000560 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000561 self.assertEqual(len(report), 1)
562 self.assertEqual(report[0][2], 0)
563
564 def test_reporthook_5_bytes(self):
565 # Test on 5 byte file. Should call reporthook only 2 times (once when
566 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700567 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000568 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700569 def hooktester(block_count, block_read_size, file_size, _report=report):
570 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000571 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000572 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000573 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000574 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800575 self.assertEqual(report[0][2], 5)
576 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000577
578 def test_reporthook_8193_bytes(self):
579 # Test on 8193 byte file. Should call reporthook only 3 times (once
580 # when the "network connection" is established, once for the next 8192
581 # bytes, and once for the last byte).
582 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700583 def hooktester(block_count, block_read_size, file_size, _report=report):
584 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000585 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000586 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000587 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000588 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800589 self.assertEqual(report[0][2], 8193)
590 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700591 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800592 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000593
Senthil Kumarance260142011-11-01 01:35:17 +0800594
595class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
596 """Test urllib.urlretrieve() using fake http connections"""
597
598 def test_short_content_raises_ContentTooShortError(self):
599 self.fakehttp(b'''HTTP/1.1 200 OK
600Date: Wed, 02 Jan 2008 03:03:54 GMT
601Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
602Connection: close
603Content-Length: 100
604Content-Type: text/html; charset=iso-8859-1
605
606FF
607''')
608
609 def _reporthook(par1, par2, par3):
610 pass
611
612 with self.assertRaises(urllib.error.ContentTooShortError):
613 try:
614 urllib.request.urlretrieve('http://example.com/',
615 reporthook=_reporthook)
616 finally:
617 self.unfakehttp()
618
619 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
620 self.fakehttp(b'''HTTP/1.1 200 OK
621Date: Wed, 02 Jan 2008 03:03:54 GMT
622Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
623Connection: close
624Content-Length: 100
625Content-Type: text/html; charset=iso-8859-1
626
627FF
628''')
629 with self.assertRaises(urllib.error.ContentTooShortError):
630 try:
631 urllib.request.urlretrieve('http://example.com/')
632 finally:
633 self.unfakehttp()
634
635
Brett Cannon74bfd702003-04-25 09:39:47 +0000636class QuotingTests(unittest.TestCase):
637 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000638
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000639 According to RFC 2396 (Uniform Resource Identifiers), to escape a
640 character you write it as '%' + <2 character US-ASCII hex value>.
641 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
642 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000643
644 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000645
Brett Cannon74bfd702003-04-25 09:39:47 +0000646 Reserved characters : ";/?:@&=+$,"
647 Have special meaning in URIs and must be escaped if not being used for
648 their special meaning
649 Data characters : letters, digits, and "-_.!~*'()"
650 Unreserved and do not need to be escaped; can be, though, if desired
651 Control characters : 0x00 - 0x1F, 0x7F
652 Have no use in URIs so must be escaped
653 space : 0x20
654 Must be escaped
655 Delimiters : '<>#%"'
656 Must be escaped
657 Unwise : "{}|\^[]`"
658 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000659
Brett Cannon74bfd702003-04-25 09:39:47 +0000660 """
661
662 def test_never_quote(self):
663 # Make sure quote() does not quote letters, digits, and "_,.-"
664 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
665 "abcdefghijklmnopqrstuvwxyz",
666 "0123456789",
667 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000668 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000669 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000670 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000671 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000672 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000673 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000674
675 def test_default_safe(self):
676 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000677 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000678
679 def test_safe(self):
680 # Test setting 'safe' parameter does what it should do
681 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000682 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000683 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000684 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000685 result = urllib.parse.quote_plus(quote_by_default,
686 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000687 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000688 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000689 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000690 # Safe expressed as bytes rather than str
691 result = urllib.parse.quote(quote_by_default, safe=b"<>")
692 self.assertEqual(quote_by_default, result,
693 "using quote(): %r != %r" % (quote_by_default, result))
694 # "Safe" non-ASCII characters should have no effect
695 # (Since URIs are not allowed to have non-ASCII characters)
696 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
697 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
698 self.assertEqual(expect, result,
699 "using quote(): %r != %r" %
700 (expect, result))
701 # Same as above, but using a bytes rather than str
702 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
703 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
704 self.assertEqual(expect, result,
705 "using quote(): %r != %r" %
706 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000707
708 def test_default_quoting(self):
709 # Make sure all characters that should be quoted are by default sans
710 # space (separate test for that).
711 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
712 should_quote.append('<>#%"{}|\^[]`')
713 should_quote.append(chr(127)) # For 0x7F
714 should_quote = ''.join(should_quote)
715 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000716 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000717 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000718 "using quote(): "
719 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000720 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000721 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000722 self.assertEqual(hexescape(char), result,
723 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000724 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000725 (char, hexescape(char), result))
726 del should_quote
727 partial_quote = "ab[]cd"
728 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000729 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000730 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000731 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800732 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000733 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000734 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000735
736 def test_quoting_space(self):
737 # Make sure quote() and quote_plus() handle spaces as specified in
738 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000739 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000740 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000741 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000742 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000743 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000744 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000745 given = "a b cd e f"
746 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000747 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000748 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000749 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000750 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000751 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000752 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000753 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000754
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000755 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000756 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000757 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000758 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000759 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000760 # Test with bytes
761 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
762 'alpha%2Bbeta+gamma')
763 # Test with safe bytes
764 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
765 'alpha+beta+gamma')
766
767 def test_quote_bytes(self):
768 # Bytes should quote directly to percent-encoded values
769 given = b"\xa2\xd8ab\xff"
770 expect = "%A2%D8ab%FF"
771 result = urllib.parse.quote(given)
772 self.assertEqual(expect, result,
773 "using quote(): %r != %r" % (expect, result))
774 # Encoding argument should raise type error on bytes input
775 self.assertRaises(TypeError, urllib.parse.quote, given,
776 encoding="latin-1")
777 # quote_from_bytes should work the same
778 result = urllib.parse.quote_from_bytes(given)
779 self.assertEqual(expect, result,
780 "using quote_from_bytes(): %r != %r"
781 % (expect, result))
782
783 def test_quote_with_unicode(self):
784 # Characters in Latin-1 range, encoded by default in UTF-8
785 given = "\xa2\xd8ab\xff"
786 expect = "%C2%A2%C3%98ab%C3%BF"
787 result = urllib.parse.quote(given)
788 self.assertEqual(expect, result,
789 "using quote(): %r != %r" % (expect, result))
790 # Characters in Latin-1 range, encoded by with None (default)
791 result = urllib.parse.quote(given, encoding=None, errors=None)
792 self.assertEqual(expect, result,
793 "using quote(): %r != %r" % (expect, result))
794 # Characters in Latin-1 range, encoded with Latin-1
795 given = "\xa2\xd8ab\xff"
796 expect = "%A2%D8ab%FF"
797 result = urllib.parse.quote(given, encoding="latin-1")
798 self.assertEqual(expect, result,
799 "using quote(): %r != %r" % (expect, result))
800 # Characters in BMP, encoded by default in UTF-8
801 given = "\u6f22\u5b57" # "Kanji"
802 expect = "%E6%BC%A2%E5%AD%97"
803 result = urllib.parse.quote(given)
804 self.assertEqual(expect, result,
805 "using quote(): %r != %r" % (expect, result))
806 # Characters in BMP, encoded with Latin-1
807 given = "\u6f22\u5b57"
808 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
809 encoding="latin-1")
810 # Characters in BMP, encoded with Latin-1, with replace error handling
811 given = "\u6f22\u5b57"
812 expect = "%3F%3F" # "??"
813 result = urllib.parse.quote(given, encoding="latin-1",
814 errors="replace")
815 self.assertEqual(expect, result,
816 "using quote(): %r != %r" % (expect, result))
817 # Characters in BMP, Latin-1, with xmlcharref error handling
818 given = "\u6f22\u5b57"
819 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
820 result = urllib.parse.quote(given, encoding="latin-1",
821 errors="xmlcharrefreplace")
822 self.assertEqual(expect, result,
823 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000824
Georg Brandlfaf41492009-05-26 18:31:11 +0000825 def test_quote_plus_with_unicode(self):
826 # Encoding (latin-1) test for quote_plus
827 given = "\xa2\xd8 \xff"
828 expect = "%A2%D8+%FF"
829 result = urllib.parse.quote_plus(given, encoding="latin-1")
830 self.assertEqual(expect, result,
831 "using quote_plus(): %r != %r" % (expect, result))
832 # Errors test for quote_plus
833 given = "ab\u6f22\u5b57 cd"
834 expect = "ab%3F%3F+cd"
835 result = urllib.parse.quote_plus(given, encoding="latin-1",
836 errors="replace")
837 self.assertEqual(expect, result,
838 "using quote_plus(): %r != %r" % (expect, result))
839
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000840
Brett Cannon74bfd702003-04-25 09:39:47 +0000841class UnquotingTests(unittest.TestCase):
842 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000843
Brett Cannon74bfd702003-04-25 09:39:47 +0000844 See the doc string for quoting_Tests for details on quoting and such.
845
846 """
847
848 def test_unquoting(self):
849 # Make sure unquoting of all ASCII values works
850 escape_list = []
851 for num in range(128):
852 given = hexescape(chr(num))
853 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000854 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000855 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000856 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000857 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000858 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000859 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000860 (expect, result))
861 escape_list.append(given)
862 escape_string = ''.join(escape_list)
863 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000864 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000865 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000866 "using unquote(): not all characters escaped: "
867 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000868 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
869 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000870 with support.check_warnings(('', BytesWarning), quiet=True):
871 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000872
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000873 def test_unquoting_badpercent(self):
874 # Test unquoting on bad percent-escapes
875 given = '%xab'
876 expect = given
877 result = urllib.parse.unquote(given)
878 self.assertEqual(expect, result, "using unquote(): %r != %r"
879 % (expect, result))
880 given = '%x'
881 expect = given
882 result = urllib.parse.unquote(given)
883 self.assertEqual(expect, result, "using unquote(): %r != %r"
884 % (expect, result))
885 given = '%'
886 expect = given
887 result = urllib.parse.unquote(given)
888 self.assertEqual(expect, result, "using unquote(): %r != %r"
889 % (expect, result))
890 # unquote_to_bytes
891 given = '%xab'
892 expect = bytes(given, 'ascii')
893 result = urllib.parse.unquote_to_bytes(given)
894 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
895 % (expect, result))
896 given = '%x'
897 expect = bytes(given, 'ascii')
898 result = urllib.parse.unquote_to_bytes(given)
899 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
900 % (expect, result))
901 given = '%'
902 expect = bytes(given, 'ascii')
903 result = urllib.parse.unquote_to_bytes(given)
904 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
905 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000906 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
907 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000908
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000909 def test_unquoting_mixed_case(self):
910 # Test unquoting on mixed-case hex digits in the percent-escapes
911 given = '%Ab%eA'
912 expect = b'\xab\xea'
913 result = urllib.parse.unquote_to_bytes(given)
914 self.assertEqual(expect, result,
915 "using unquote_to_bytes(): %r != %r"
916 % (expect, result))
917
Brett Cannon74bfd702003-04-25 09:39:47 +0000918 def test_unquoting_parts(self):
919 # Make sure unquoting works when have non-quoted characters
920 # interspersed
921 given = 'ab%sd' % hexescape('c')
922 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000923 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000924 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000925 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000926 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000927 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000928 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000929
Brett Cannon74bfd702003-04-25 09:39:47 +0000930 def test_unquoting_plus(self):
931 # Test difference between unquote() and unquote_plus()
932 given = "are+there+spaces..."
933 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000934 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000935 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000936 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000937 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000938 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000939 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000940 "using unquote_plus(): %r != %r" % (expect, result))
941
942 def test_unquote_to_bytes(self):
943 given = 'br%C3%BCckner_sapporo_20050930.doc'
944 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
945 result = urllib.parse.unquote_to_bytes(given)
946 self.assertEqual(expect, result,
947 "using unquote_to_bytes(): %r != %r"
948 % (expect, result))
949 # Test on a string with unescaped non-ASCII characters
950 # (Technically an invalid URI; expect those characters to be UTF-8
951 # encoded).
952 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
953 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
954 self.assertEqual(expect, result,
955 "using unquote_to_bytes(): %r != %r"
956 % (expect, result))
957 # Test with a bytes as input
958 given = b'%A2%D8ab%FF'
959 expect = b'\xa2\xd8ab\xff'
960 result = urllib.parse.unquote_to_bytes(given)
961 self.assertEqual(expect, result,
962 "using unquote_to_bytes(): %r != %r"
963 % (expect, result))
964 # Test with a bytes as input, with unescaped non-ASCII bytes
965 # (Technically an invalid URI; expect those bytes to be preserved)
966 given = b'%A2\xd8ab%FF'
967 expect = b'\xa2\xd8ab\xff'
968 result = urllib.parse.unquote_to_bytes(given)
969 self.assertEqual(expect, result,
970 "using unquote_to_bytes(): %r != %r"
971 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000972
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000973 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000974 # Characters in the Latin-1 range, encoded with UTF-8
975 given = 'br%C3%BCckner_sapporo_20050930.doc'
976 expect = 'br\u00fcckner_sapporo_20050930.doc'
977 result = urllib.parse.unquote(given)
978 self.assertEqual(expect, result,
979 "using unquote(): %r != %r" % (expect, result))
980 # Characters in the Latin-1 range, encoded with None (default)
981 result = urllib.parse.unquote(given, encoding=None, errors=None)
982 self.assertEqual(expect, result,
983 "using unquote(): %r != %r" % (expect, result))
984
985 # Characters in the Latin-1 range, encoded with Latin-1
986 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
987 encoding="latin-1")
988 expect = 'br\u00fcckner_sapporo_20050930.doc'
989 self.assertEqual(expect, result,
990 "using unquote(): %r != %r" % (expect, result))
991
992 # Characters in BMP, encoded with UTF-8
993 given = "%E6%BC%A2%E5%AD%97"
994 expect = "\u6f22\u5b57" # "Kanji"
995 result = urllib.parse.unquote(given)
996 self.assertEqual(expect, result,
997 "using unquote(): %r != %r" % (expect, result))
998
999 # Decode with UTF-8, invalid sequence
1000 given = "%F3%B1"
1001 expect = "\ufffd" # Replacement character
1002 result = urllib.parse.unquote(given)
1003 self.assertEqual(expect, result,
1004 "using unquote(): %r != %r" % (expect, result))
1005
1006 # Decode with UTF-8, invalid sequence, replace errors
1007 result = urllib.parse.unquote(given, errors="replace")
1008 self.assertEqual(expect, result,
1009 "using unquote(): %r != %r" % (expect, result))
1010
1011 # Decode with UTF-8, invalid sequence, ignoring errors
1012 given = "%F3%B1"
1013 expect = ""
1014 result = urllib.parse.unquote(given, errors="ignore")
1015 self.assertEqual(expect, result,
1016 "using unquote(): %r != %r" % (expect, result))
1017
1018 # A mix of non-ASCII and percent-encoded characters, UTF-8
1019 result = urllib.parse.unquote("\u6f22%C3%BC")
1020 expect = '\u6f22\u00fc'
1021 self.assertEqual(expect, result,
1022 "using unquote(): %r != %r" % (expect, result))
1023
1024 # A mix of non-ASCII and percent-encoded characters, Latin-1
1025 # (Note, the string contains non-Latin-1-representable characters)
1026 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1027 expect = '\u6f22\u00fc'
1028 self.assertEqual(expect, result,
1029 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +00001030
Brett Cannon74bfd702003-04-25 09:39:47 +00001031class urlencode_Tests(unittest.TestCase):
1032 """Tests for urlencode()"""
1033
1034 def help_inputtype(self, given, test_type):
1035 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001036
Brett Cannon74bfd702003-04-25 09:39:47 +00001037 'given' must lead to only the pairs:
1038 * 1st, 1
1039 * 2nd, 2
1040 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001041
Brett Cannon74bfd702003-04-25 09:39:47 +00001042 Test cannot assume anything about order. Docs make no guarantee and
1043 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001044
Brett Cannon74bfd702003-04-25 09:39:47 +00001045 """
1046 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001047 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001048 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001049 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001050 "testing %s: %s not found in %s" %
1051 (test_type, expected, result))
1052 self.assertEqual(result.count('&'), 2,
1053 "testing %s: expected 2 '&'s; got %s" %
1054 (test_type, result.count('&')))
1055 amp_location = result.index('&')
1056 on_amp_left = result[amp_location - 1]
1057 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001058 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001059 "testing %s: '&' not located in proper place in %s" %
1060 (test_type, result))
1061 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1062 "testing %s: "
1063 "unexpected number of characters: %s != %s" %
1064 (test_type, len(result), (5 * 3) + 2))
1065
1066 def test_using_mapping(self):
1067 # Test passing in a mapping object as an argument.
1068 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1069 "using dict as input type")
1070
1071 def test_using_sequence(self):
1072 # Test passing in a sequence of two-item sequences as an argument.
1073 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1074 "using sequence of two-item tuples as input")
1075
1076 def test_quoting(self):
1077 # Make sure keys and values are quoted using quote_plus()
1078 given = {"&":"="}
1079 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001080 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001081 self.assertEqual(expect, result)
1082 given = {"key name":"A bunch of pluses"}
1083 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001084 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001085 self.assertEqual(expect, result)
1086
1087 def test_doseq(self):
1088 # Test that passing True for 'doseq' parameter works correctly
1089 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001090 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1091 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001092 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001093 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001094 for value in given["sequence"]:
1095 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001096 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001097 self.assertEqual(result.count('&'), 2,
1098 "Expected 2 '&'s, got %s" % result.count('&'))
1099
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001100 def test_empty_sequence(self):
1101 self.assertEqual("", urllib.parse.urlencode({}))
1102 self.assertEqual("", urllib.parse.urlencode([]))
1103
1104 def test_nonstring_values(self):
1105 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1106 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1107
1108 def test_nonstring_seq_values(self):
1109 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1110 self.assertEqual("a=None&a=a",
1111 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001112 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001113 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001114 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001115
Senthil Kumarandf022da2010-07-03 17:48:22 +00001116 def test_urlencode_encoding(self):
1117 # ASCII encoding. Expect %3F with errors="replace'
1118 given = (('\u00a0', '\u00c1'),)
1119 expect = '%3F=%3F'
1120 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1121 self.assertEqual(expect, result)
1122
1123 # Default is UTF-8 encoding.
1124 given = (('\u00a0', '\u00c1'),)
1125 expect = '%C2%A0=%C3%81'
1126 result = urllib.parse.urlencode(given)
1127 self.assertEqual(expect, result)
1128
1129 # Latin-1 encoding.
1130 given = (('\u00a0', '\u00c1'),)
1131 expect = '%A0=%C1'
1132 result = urllib.parse.urlencode(given, encoding="latin-1")
1133 self.assertEqual(expect, result)
1134
1135 def test_urlencode_encoding_doseq(self):
1136 # ASCII Encoding. Expect %3F with errors="replace'
1137 given = (('\u00a0', '\u00c1'),)
1138 expect = '%3F=%3F'
1139 result = urllib.parse.urlencode(given, doseq=True,
1140 encoding="ASCII", errors="replace")
1141 self.assertEqual(expect, result)
1142
1143 # ASCII Encoding. On a sequence of values.
1144 given = (("\u00a0", (1, "\u00c1")),)
1145 expect = '%3F=1&%3F=%3F'
1146 result = urllib.parse.urlencode(given, True,
1147 encoding="ASCII", errors="replace")
1148 self.assertEqual(expect, result)
1149
1150 # Utf-8
1151 given = (("\u00a0", "\u00c1"),)
1152 expect = '%C2%A0=%C3%81'
1153 result = urllib.parse.urlencode(given, True)
1154 self.assertEqual(expect, result)
1155
1156 given = (("\u00a0", (42, "\u00c1")),)
1157 expect = '%C2%A0=42&%C2%A0=%C3%81'
1158 result = urllib.parse.urlencode(given, True)
1159 self.assertEqual(expect, result)
1160
1161 # latin-1
1162 given = (("\u00a0", "\u00c1"),)
1163 expect = '%A0=%C1'
1164 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1165 self.assertEqual(expect, result)
1166
1167 given = (("\u00a0", (42, "\u00c1")),)
1168 expect = '%A0=42&%A0=%C1'
1169 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1170 self.assertEqual(expect, result)
1171
1172 def test_urlencode_bytes(self):
1173 given = ((b'\xa0\x24', b'\xc1\x24'),)
1174 expect = '%A0%24=%C1%24'
1175 result = urllib.parse.urlencode(given)
1176 self.assertEqual(expect, result)
1177 result = urllib.parse.urlencode(given, True)
1178 self.assertEqual(expect, result)
1179
1180 # Sequence of values
1181 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1182 expect = '%A0%24=42&%A0%24=%C1%24'
1183 result = urllib.parse.urlencode(given, True)
1184 self.assertEqual(expect, result)
1185
1186 def test_urlencode_encoding_safe_parameter(self):
1187
1188 # Send '$' (\x24) as safe character
1189 # Default utf-8 encoding
1190
1191 given = ((b'\xa0\x24', b'\xc1\x24'),)
1192 result = urllib.parse.urlencode(given, safe=":$")
1193 expect = '%A0$=%C1$'
1194 self.assertEqual(expect, result)
1195
1196 given = ((b'\xa0\x24', b'\xc1\x24'),)
1197 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1198 expect = '%A0$=%C1$'
1199 self.assertEqual(expect, result)
1200
1201 # Safe parameter in sequence
1202 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1203 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1204 result = urllib.parse.urlencode(given, True, safe=":$")
1205 self.assertEqual(expect, result)
1206
1207 # Test all above in latin-1 encoding
1208
1209 given = ((b'\xa0\x24', b'\xc1\x24'),)
1210 result = urllib.parse.urlencode(given, safe=":$",
1211 encoding="latin-1")
1212 expect = '%A0$=%C1$'
1213 self.assertEqual(expect, result)
1214
1215 given = ((b'\xa0\x24', b'\xc1\x24'),)
1216 expect = '%A0$=%C1$'
1217 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1218 encoding="latin-1")
1219
1220 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1221 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1222 result = urllib.parse.urlencode(given, True, safe=":$",
1223 encoding="latin-1")
1224 self.assertEqual(expect, result)
1225
Brett Cannon74bfd702003-04-25 09:39:47 +00001226class Pathname_Tests(unittest.TestCase):
1227 """Test pathname2url() and url2pathname()"""
1228
1229 def test_basic(self):
1230 # Make sure simple tests pass
1231 expected_path = os.path.join("parts", "of", "a", "path")
1232 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001233 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001234 self.assertEqual(expected_url, result,
1235 "pathname2url() failed; %s != %s" %
1236 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001237 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001238 self.assertEqual(expected_path, result,
1239 "url2pathame() failed; %s != %s" %
1240 (result, expected_path))
1241
1242 def test_quoting(self):
1243 # Test automatic quoting and unquoting works for pathnam2url() and
1244 # url2pathname() respectively
1245 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001246 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1247 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001248 self.assertEqual(expect, result,
1249 "pathname2url() failed; %s != %s" %
1250 (expect, result))
1251 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001252 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001253 self.assertEqual(expect, result,
1254 "url2pathname() failed; %s != %s" %
1255 (expect, result))
1256 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001257 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1258 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001259 self.assertEqual(expect, result,
1260 "pathname2url() failed; %s != %s" %
1261 (expect, result))
1262 given = "make+sure/using_unquote"
1263 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001264 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001265 self.assertEqual(expect, result,
1266 "url2pathname() failed; %s != %s" %
1267 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001268
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001269 @unittest.skipUnless(sys.platform == 'win32',
1270 'test specific to the urllib.url2path function.')
1271 def test_ntpath(self):
1272 given = ('/C:/', '///C:/', '/C|//')
1273 expect = 'C:\\'
1274 for url in given:
1275 result = urllib.request.url2pathname(url)
1276 self.assertEqual(expect, result,
1277 'urllib.request..url2pathname() failed; %s != %s' %
1278 (expect, result))
1279 given = '///C|/path'
1280 expect = 'C:\\path'
1281 result = urllib.request.url2pathname(given)
1282 self.assertEqual(expect, result,
1283 'urllib.request.url2pathname() failed; %s != %s' %
1284 (expect, result))
1285
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001286class Utility_Tests(unittest.TestCase):
1287 """Testcase to test the various utility functions in the urllib."""
1288
1289 def test_splitpasswd(self):
1290 """Some of password examples are not sensible, but it is added to
1291 confirming to RFC2617 and addressing issue4675.
1292 """
1293 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1294 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1295 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1296 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1297 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1298 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1299 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001300 self.assertEqual(('user', 'a b'),urllib.parse.splitpasswd('user:a b'))
1301 self.assertEqual(('user 2', 'ab'),urllib.parse.splitpasswd('user 2:ab'))
1302 self.assertEqual(('user+1', 'a+b'),urllib.parse.splitpasswd('user+1:a+b'))
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001303
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001304 def test_thishost(self):
1305 """Test the urllib.request.thishost utility function returns a tuple"""
1306 self.assertIsInstance(urllib.request.thishost(), tuple)
1307
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001308
1309class URLopener_Tests(unittest.TestCase):
1310 """Testcase to test the open method of URLopener class."""
1311
1312 def test_quoted_open(self):
1313 class DummyURLopener(urllib.request.URLopener):
1314 def open_spam(self, url):
1315 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001316 with support.check_warnings(
1317 ('DummyURLopener style of invoking requests is deprecated.',
1318 DeprecationWarning)):
1319 self.assertEqual(DummyURLopener().open(
1320 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001321
Ezio Melotti79b99db2013-02-21 02:41:42 +02001322 # test the safe characters are not quoted by urlopen
1323 self.assertEqual(DummyURLopener().open(
1324 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1325 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001326
Guido van Rossume7ba4952007-06-06 23:52:48 +00001327# Just commented them out.
1328# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001329# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001330# fail in one of the tests, sometimes in other. I have a linux, and
1331# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001332# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001333# . Facundo
1334#
1335# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001336# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001337# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1338# serv.settimeout(3)
1339# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1340# serv.bind(("", 9093))
1341# serv.listen(5)
1342# try:
1343# conn, addr = serv.accept()
1344# conn.send("1 Hola mundo\n")
1345# cantdata = 0
1346# while cantdata < 13:
1347# data = conn.recv(13-cantdata)
1348# cantdata += len(data)
1349# time.sleep(.3)
1350# conn.send("2 No more lines\n")
1351# conn.close()
1352# except socket.timeout:
1353# pass
1354# finally:
1355# serv.close()
1356# evt.set()
1357#
1358# class FTPWrapperTests(unittest.TestCase):
1359#
1360# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001361# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001362# ftplib.FTP.port = 9093
1363# self.evt = threading.Event()
1364# threading.Thread(target=server, args=(self.evt,)).start()
1365# time.sleep(.1)
1366#
1367# def tearDown(self):
1368# self.evt.wait()
1369#
1370# def testBasic(self):
1371# # connects
1372# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001373# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001374#
1375# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001376# # global default timeout is ignored
1377# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001378# self.assertIsNone(socket.getdefaulttimeout())
Guido van Rossume7ba4952007-06-06 23:52:48 +00001379# socket.setdefaulttimeout(30)
1380# try:
1381# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1382# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001383# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001384# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001385# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001386#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001387# def testTimeoutDefault(self):
1388# # global default timeout is used
1389# import socket
Serhiy Storchaka25d8aea2014-02-08 14:50:08 +02001390# self.assertIsNone(socket.getdefaulttimeout())
Georg Brandlf78e02b2008-06-10 17:40:04 +00001391# socket.setdefaulttimeout(30)
1392# try:
1393# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1394# finally:
1395# socket.setdefaulttimeout(None)
1396# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1397# ftp.close()
1398#
1399# def testTimeoutValue(self):
1400# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1401# timeout=30)
1402# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1403# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001404
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001405
Senthil Kumarande49d642011-10-16 23:54:44 +08001406class RequestTests(unittest.TestCase):
1407 """Unit tests for urllib.request.Request."""
1408
1409 def test_default_values(self):
1410 Request = urllib.request.Request
1411 request = Request("http://www.python.org")
1412 self.assertEqual(request.get_method(), 'GET')
1413 request = Request("http://www.python.org", {})
1414 self.assertEqual(request.get_method(), 'POST')
1415
1416 def test_with_method_arg(self):
1417 Request = urllib.request.Request
1418 request = Request("http://www.python.org", method='HEAD')
1419 self.assertEqual(request.method, 'HEAD')
1420 self.assertEqual(request.get_method(), 'HEAD')
1421 request = Request("http://www.python.org", {}, method='HEAD')
1422 self.assertEqual(request.method, 'HEAD')
1423 self.assertEqual(request.get_method(), 'HEAD')
1424 request = Request("http://www.python.org", method='GET')
1425 self.assertEqual(request.get_method(), 'GET')
1426 request.method = 'HEAD'
1427 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001428
1429
Senthil Kumaran277e9092013-04-10 20:51:19 -07001430class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001431
Senthil Kumaran277e9092013-04-10 20:51:19 -07001432 def test_converting_drive_letter(self):
1433 self.assertEqual(url2pathname("///C|"), 'C:')
1434 self.assertEqual(url2pathname("///C:"), 'C:')
1435 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001436
Senthil Kumaran277e9092013-04-10 20:51:19 -07001437 def test_converting_when_no_drive_letter(self):
1438 # cannot end a raw string in \
1439 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1440 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1441
1442 def test_simple_compare(self):
1443 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1444 r'C:\foo\bar\spam.foo')
1445
1446 def test_non_ascii_drive_letter(self):
1447 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1448
1449 def test_roundtrip_url2pathname(self):
1450 list_of_paths = ['C:',
1451 r'\\\C\test\\',
1452 r'C:\foo\bar\spam.foo'
1453 ]
1454 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001455 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001456
1457class PathName2URLTests(unittest.TestCase):
1458
1459 def test_converting_drive_letter(self):
1460 self.assertEqual(pathname2url("C:"), '///C:')
1461 self.assertEqual(pathname2url("C:\\"), '///C:')
1462
1463 def test_converting_when_no_drive_letter(self):
1464 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1465 '/////folder/test/')
1466 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1467 '////folder/test/')
1468 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1469 '/folder/test/')
1470
1471 def test_simple_compare(self):
1472 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1473 "///C:/foo/bar/spam.foo" )
1474
1475 def test_long_drive_letter(self):
1476 self.assertRaises(IOError, pathname2url, "XX:\\")
1477
1478 def test_roundtrip_pathname2url(self):
1479 list_of_paths = ['///C:',
1480 '/////folder/test/',
1481 '///C:/foo/bar/spam.foo']
1482 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001483 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001484
1485if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001486 unittest.main()