blob: cbe4327046df310530cbd5997bb3623d80ea77e7 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000014
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080015from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010016import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080017
Brett Cannon74bfd702003-04-25 09:39:47 +000018def hexescape(char):
19 """Escape char as RFC 2396 specifies"""
20 hex_repr = hex(ord(char))[2:].upper()
21 if len(hex_repr) == 1:
22 hex_repr = "0%s" % hex_repr
23 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000024
Jeremy Hylton1afc1692008-06-18 20:49:58 +000025# Shortcut for testing FancyURLopener
26_urlopener = None
27def urlopen(url, data=None, proxies=None):
28 """urlopen(url [, data]) -> open file-like object"""
29 global _urlopener
30 if proxies is not None:
31 opener = urllib.request.FancyURLopener(proxies=proxies)
32 elif not _urlopener:
Ezio Melotti79b99db2013-02-21 02:41:42 +020033 with support.check_warnings(
34 ('FancyURLopener style of invoking requests is deprecated.',
35 DeprecationWarning)):
36 opener = urllib.request.FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037 _urlopener = opener
38 else:
39 opener = _urlopener
40 if data is None:
41 return opener.open(url)
42 else:
43 return opener.open(url, data)
44
Senthil Kumarance260142011-11-01 01:35:17 +080045
46class FakeHTTPMixin(object):
47 def fakehttp(self, fakedata):
48 class FakeSocket(io.BytesIO):
49 io_refs = 1
50
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080051 def sendall(self, data):
52 FakeHTTPConnection.buf = data
Senthil Kumarance260142011-11-01 01:35:17 +080053
54 def makefile(self, *args, **kwds):
55 self.io_refs += 1
56 return self
57
58 def read(self, amt=None):
59 if self.closed:
60 return b""
61 return io.BytesIO.read(self, amt)
62
63 def readline(self, length=None):
64 if self.closed:
65 return b""
66 return io.BytesIO.readline(self, length)
67
68 def close(self):
69 self.io_refs -= 1
70 if self.io_refs == 0:
71 io.BytesIO.close(self)
72
73 class FakeHTTPConnection(http.client.HTTPConnection):
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080074
75 # buffer to store data for verification in urlopen tests.
76 buf = None
77
Senthil Kumarance260142011-11-01 01:35:17 +080078 def connect(self):
79 self.sock = FakeSocket(fakedata)
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080080
Senthil Kumarance260142011-11-01 01:35:17 +080081 self._connection_class = http.client.HTTPConnection
82 http.client.HTTPConnection = FakeHTTPConnection
83
84 def unfakehttp(self):
85 http.client.HTTPConnection = self._connection_class
86
87
Brett Cannon74bfd702003-04-25 09:39:47 +000088class urlopen_FileTests(unittest.TestCase):
89 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000090
Brett Cannon74bfd702003-04-25 09:39:47 +000091 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000092 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000093
Brett Cannon74bfd702003-04-25 09:39:47 +000094 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000095
Brett Cannon74bfd702003-04-25 09:39:47 +000096 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000097 # Create a temp file to use for testing
98 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
99 "ascii")
100 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000101 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000102 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000103 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000104 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000105 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000106 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000107
Brett Cannon74bfd702003-04-25 09:39:47 +0000108 def tearDown(self):
109 """Shut down the open object"""
110 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000111 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000112
Brett Cannon74bfd702003-04-25 09:39:47 +0000113 def test_interface(self):
114 # Make sure object returned by urlopen() has the specified methods
115 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000116 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000117 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000118 "object returned by urlopen() lacks %s attribute" %
119 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000120
Brett Cannon74bfd702003-04-25 09:39:47 +0000121 def test_read(self):
122 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000123
Brett Cannon74bfd702003-04-25 09:39:47 +0000124 def test_readline(self):
125 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000126 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000127 "calling readline() after exhausting the file did not"
128 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000129
Brett Cannon74bfd702003-04-25 09:39:47 +0000130 def test_readlines(self):
131 lines_list = self.returned_obj.readlines()
132 self.assertEqual(len(lines_list), 1,
133 "readlines() returned the wrong number of lines")
134 self.assertEqual(lines_list[0], self.text,
135 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000136
Brett Cannon74bfd702003-04-25 09:39:47 +0000137 def test_fileno(self):
138 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000139 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000140 self.assertEqual(os.read(file_num, len(self.text)), self.text,
141 "Reading on the file descriptor returned by fileno() "
142 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000143
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800145 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000146 # by the tearDown() method for the test
147 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000148
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000150 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000151
Brett Cannon74bfd702003-04-25 09:39:47 +0000152 def test_geturl(self):
153 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000154
Christian Heimes9bd667a2008-01-20 15:14:11 +0000155 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000156 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000157
Brett Cannon74bfd702003-04-25 09:39:47 +0000158 def test_iter(self):
159 # Test iterator
160 # Don't need to count number of iterations since test would fail the
161 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200162 # comparison.
163 # Use the iterator in the usual implicit way to test for ticket #4608.
164 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000165 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000166
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800167 def test_relativelocalfile(self):
168 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
169
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000170class ProxyTests(unittest.TestCase):
171
172 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000173 # Records changes to env vars
174 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000175 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000176 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000177 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000178 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000179
180 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000181 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000182 self.env.__exit__()
183 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000184
185 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000186 self.env.set('NO_PROXY', 'localhost')
187 proxies = urllib.request.getproxies_environment()
188 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000189 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800190 # List of no_proxies with space.
191 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
192 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000193
Senthil Kumarance260142011-11-01 01:35:17 +0800194class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000195 """Test urlopen() opening a fake http connection."""
196
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000197 def check_read(self, ver):
198 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000199 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000200 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000201 self.assertEqual(fp.readline(), b"Hello!")
202 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000203 self.assertEqual(fp.geturl(), 'http://python.org/')
204 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000205 finally:
206 self.unfakehttp()
207
Senthil Kumaran26430412011-04-13 07:01:19 +0800208 def test_url_fragment(self):
209 # Issue #11703: geturl() omits fragments in the original URL.
210 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800211 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800212 try:
213 fp = urllib.request.urlopen(url)
214 self.assertEqual(fp.geturl(), url)
215 finally:
216 self.unfakehttp()
217
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800218 def test_willclose(self):
219 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800220 try:
221 resp = urlopen("http://www.python.org")
222 self.assertTrue(resp.fp.will_close)
223 finally:
224 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800225
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000226 def test_read_0_9(self):
227 # "0.9" response accepted (but not "simple responses" without
228 # a status line)
229 self.check_read(b"0.9")
230
231 def test_read_1_0(self):
232 self.check_read(b"1.0")
233
234 def test_read_1_1(self):
235 self.check_read(b"1.1")
236
Christian Heimes57dddfb2008-01-02 18:30:52 +0000237 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200238 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000239 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
240Date: Wed, 02 Jan 2008 03:03:54 GMT
241Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
242Connection: close
243Content-Type: text/html; charset=iso-8859-1
244''')
245 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200246 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000247 finally:
248 self.unfakehttp()
249
guido@google.coma119df92011-03-29 11:41:02 -0700250 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200251 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700252 self.fakehttp(b'''HTTP/1.1 302 Found
253Date: Wed, 02 Jan 2008 03:03:54 GMT
254Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
255Location: file://guidocomputer.athome.com:/python/license
256Connection: close
257Content-Type: text/html; charset=iso-8859-1
258''')
259 try:
260 self.assertRaises(urllib.error.HTTPError, urlopen,
261 "http://python.org/")
262 finally:
263 self.unfakehttp()
264
Guido van Rossumd8faa362007-04-27 19:54:29 +0000265 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200266 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000267 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000268 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000269 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000271 finally:
272 self.unfakehttp()
273
Senthil Kumaranf5776862012-10-21 13:30:02 -0700274 def test_missing_localfile(self):
275 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700276 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700277 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700278 self.assertTrue(e.exception.filename)
279 self.assertTrue(e.exception.reason)
280
281 def test_file_notexists(self):
282 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700283 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700284 try:
285 self.assertTrue(os.path.exists(tmp_file))
286 with urlopen(tmp_fileurl) as fobj:
287 self.assertTrue(fobj)
288 finally:
289 os.close(fd)
290 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700291 self.assertFalse(os.path.exists(tmp_file))
292 with self.assertRaises(urllib.error.URLError):
293 urlopen(tmp_fileurl)
294
295 def test_ftp_nohost(self):
296 test_ftp_url = 'ftp:///path'
297 with self.assertRaises(urllib.error.URLError) as e:
298 urlopen(test_ftp_url)
299 self.assertFalse(e.exception.filename)
300 self.assertTrue(e.exception.reason)
301
302 def test_ftp_nonexisting(self):
303 with self.assertRaises(urllib.error.URLError) as e:
304 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
305 self.assertFalse(e.exception.filename)
306 self.assertTrue(e.exception.reason)
307
Senthil Kumaranf5776862012-10-21 13:30:02 -0700308
Senthil Kumarande0eb242010-08-01 17:53:37 +0000309 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000310 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000311 try:
312 fp = urlopen("http://user:pass@python.org/")
313 self.assertEqual(fp.readline(), b"Hello!")
314 self.assertEqual(fp.readline(), b"")
315 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
316 self.assertEqual(fp.getcode(), 200)
317 finally:
318 self.unfakehttp()
319
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800320 def test_userpass_inurl_w_spaces(self):
321 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
322 try:
323 userpass = "a b:c d"
324 url = "http://{}@python.org/".format(userpass)
325 fakehttp_wrapper = http.client.HTTPConnection
326 authorization = ("Authorization: Basic %s\r\n" %
327 b64encode(userpass.encode("ASCII")).decode("ASCII"))
328 fp = urlopen(url)
329 # The authorization header must be in place
330 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
331 self.assertEqual(fp.readline(), b"Hello!")
332 self.assertEqual(fp.readline(), b"")
333 # the spaces are quoted in URL so no match
334 self.assertNotEqual(fp.geturl(), url)
335 self.assertEqual(fp.getcode(), 200)
336 finally:
337 self.unfakehttp()
338
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700339 def test_URLopener_deprecation(self):
340 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700341 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700342
Antoine Pitroudf204be2012-11-24 17:59:08 +0100343class urlopen_DataTests(unittest.TestCase):
344 """Test urlopen() opening a data URL."""
345
346 def setUp(self):
347 # text containing URL special- and unicode-characters
348 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
349 # 2x1 pixel RGB PNG image with one black and one white pixel
350 self.image = (
351 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
352 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
353 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
354 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
355
356 self.text_url = (
357 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
358 "D%26%20%C3%B6%20%C3%84%20")
359 self.text_url_base64 = (
360 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
361 "sJT0mIPYgxCA%3D")
362 # base64 encoded data URL that contains ignorable spaces,
363 # such as "\n", " ", "%0A", and "%20".
364 self.image_url = (
365 "\n"
366 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
367 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
368
369 self.text_url_resp = urllib.request.urlopen(self.text_url)
370 self.text_url_base64_resp = urllib.request.urlopen(
371 self.text_url_base64)
372 self.image_url_resp = urllib.request.urlopen(self.image_url)
373
374 def test_interface(self):
375 # Make sure object returned by urlopen() has the specified methods
376 for attr in ("read", "readline", "readlines",
377 "close", "info", "geturl", "getcode", "__iter__"):
378 self.assertTrue(hasattr(self.text_url_resp, attr),
379 "object returned by urlopen() lacks %s attribute" %
380 attr)
381
382 def test_info(self):
383 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
384 self.assertEqual(self.text_url_base64_resp.info().get_params(),
385 [('text/plain', ''), ('charset', 'ISO-8859-1')])
386 self.assertEqual(self.image_url_resp.info()['content-length'],
387 str(len(self.image)))
388 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
389 [('text/plain', ''), ('charset', 'US-ASCII')])
390
391 def test_geturl(self):
392 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
393 self.assertEqual(self.text_url_base64_resp.geturl(),
394 self.text_url_base64)
395 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
396
397 def test_read_text(self):
398 self.assertEqual(self.text_url_resp.read().decode(
399 dict(self.text_url_resp.info().get_params())['charset']), self.text)
400
401 def test_read_text_base64(self):
402 self.assertEqual(self.text_url_base64_resp.read().decode(
403 dict(self.text_url_base64_resp.info().get_params())['charset']),
404 self.text)
405
406 def test_read_image(self):
407 self.assertEqual(self.image_url_resp.read(), self.image)
408
409 def test_missing_comma(self):
410 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
411
412 def test_invalid_base64_data(self):
413 # missing padding character
414 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
415
Brett Cannon19691362003-04-29 05:08:06 +0000416class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000417 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000418
Brett Cannon19691362003-04-29 05:08:06 +0000419 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000420 # Create a list of temporary files. Each item in the list is a file
421 # name (absolute path or relative to the current working directory).
422 # All files in this list will be deleted in the tearDown method. Note,
423 # this only helps to makes sure temporary files get deleted, but it
424 # does nothing about trying to close files that may still be open. It
425 # is the responsibility of the developer to properly close files even
426 # when exceptional conditions occur.
427 self.tempFiles = []
428
Brett Cannon19691362003-04-29 05:08:06 +0000429 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000430 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000431 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000432 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000433 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000434 FILE.write(self.text)
435 FILE.close()
436 finally:
437 try: FILE.close()
438 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000439
440 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000441 # Delete the temporary files.
442 for each in self.tempFiles:
443 try: os.remove(each)
444 except: pass
445
446 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000447 filePath = os.path.abspath(filePath)
448 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000449 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000450 except UnicodeEncodeError:
451 raise unittest.SkipTest("filePath is not encodable to utf8")
452 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000453
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000454 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000455 """Creates a new temporary file containing the specified data,
456 registers the file for deletion during the test fixture tear down, and
457 returns the absolute path of the file."""
458
459 newFd, newFilePath = tempfile.mkstemp()
460 try:
461 self.registerFileForCleanUp(newFilePath)
462 newFile = os.fdopen(newFd, "wb")
463 newFile.write(data)
464 newFile.close()
465 finally:
466 try: newFile.close()
467 except: pass
468 return newFilePath
469
470 def registerFileForCleanUp(self, fileName):
471 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000472
473 def test_basic(self):
474 # Make sure that a local file just gets its own location returned and
475 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000476 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000477 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000478 self.assertIsInstance(result[1], email.message.Message,
479 "did not get a email.message.Message instance "
480 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000481
482 def test_copy(self):
483 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000484 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000485 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000486 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000487 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000488 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000489 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000490 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000491 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000492 try:
493 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000494 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000495 finally:
496 try: FILE.close()
497 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000498 self.assertEqual(self.text, text)
499
500 def test_reporthook(self):
501 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700502 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
503 self.assertIsInstance(block_count, int)
504 self.assertIsInstance(block_read_size, int)
505 self.assertIsInstance(file_size, int)
506 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000507 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000508 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000509 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000510 urllib.request.urlretrieve(
511 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000512 second_temp, hooktester)
513
514 def test_reporthook_0_bytes(self):
515 # Test on zero length file. Should call reporthook only 1 time.
516 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700517 def hooktester(block_count, block_read_size, file_size, _report=report):
518 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000519 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000520 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000521 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000522 self.assertEqual(len(report), 1)
523 self.assertEqual(report[0][2], 0)
524
525 def test_reporthook_5_bytes(self):
526 # Test on 5 byte file. Should call reporthook only 2 times (once when
527 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700528 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000529 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700530 def hooktester(block_count, block_read_size, file_size, _report=report):
531 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000532 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000533 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000534 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000535 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800536 self.assertEqual(report[0][2], 5)
537 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000538
539 def test_reporthook_8193_bytes(self):
540 # Test on 8193 byte file. Should call reporthook only 3 times (once
541 # when the "network connection" is established, once for the next 8192
542 # bytes, and once for the last byte).
543 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700544 def hooktester(block_count, block_read_size, file_size, _report=report):
545 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000546 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000547 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000548 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000549 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800550 self.assertEqual(report[0][2], 8193)
551 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700552 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800553 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000554
Senthil Kumarance260142011-11-01 01:35:17 +0800555
556class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
557 """Test urllib.urlretrieve() using fake http connections"""
558
559 def test_short_content_raises_ContentTooShortError(self):
560 self.fakehttp(b'''HTTP/1.1 200 OK
561Date: Wed, 02 Jan 2008 03:03:54 GMT
562Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
563Connection: close
564Content-Length: 100
565Content-Type: text/html; charset=iso-8859-1
566
567FF
568''')
569
570 def _reporthook(par1, par2, par3):
571 pass
572
573 with self.assertRaises(urllib.error.ContentTooShortError):
574 try:
575 urllib.request.urlretrieve('http://example.com/',
576 reporthook=_reporthook)
577 finally:
578 self.unfakehttp()
579
580 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
581 self.fakehttp(b'''HTTP/1.1 200 OK
582Date: Wed, 02 Jan 2008 03:03:54 GMT
583Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
584Connection: close
585Content-Length: 100
586Content-Type: text/html; charset=iso-8859-1
587
588FF
589''')
590 with self.assertRaises(urllib.error.ContentTooShortError):
591 try:
592 urllib.request.urlretrieve('http://example.com/')
593 finally:
594 self.unfakehttp()
595
596
Brett Cannon74bfd702003-04-25 09:39:47 +0000597class QuotingTests(unittest.TestCase):
598 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000599
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000600 According to RFC 2396 (Uniform Resource Identifiers), to escape a
601 character you write it as '%' + <2 character US-ASCII hex value>.
602 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
603 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000604
605 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000606
Brett Cannon74bfd702003-04-25 09:39:47 +0000607 Reserved characters : ";/?:@&=+$,"
608 Have special meaning in URIs and must be escaped if not being used for
609 their special meaning
610 Data characters : letters, digits, and "-_.!~*'()"
611 Unreserved and do not need to be escaped; can be, though, if desired
612 Control characters : 0x00 - 0x1F, 0x7F
613 Have no use in URIs so must be escaped
614 space : 0x20
615 Must be escaped
616 Delimiters : '<>#%"'
617 Must be escaped
618 Unwise : "{}|\^[]`"
619 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000620
Brett Cannon74bfd702003-04-25 09:39:47 +0000621 """
622
623 def test_never_quote(self):
624 # Make sure quote() does not quote letters, digits, and "_,.-"
625 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
626 "abcdefghijklmnopqrstuvwxyz",
627 "0123456789",
628 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000629 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000630 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000631 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000632 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000633 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000634 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000635
636 def test_default_safe(self):
637 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000638 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000639
640 def test_safe(self):
641 # Test setting 'safe' parameter does what it should do
642 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000643 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000644 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000645 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000646 result = urllib.parse.quote_plus(quote_by_default,
647 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000648 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000649 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000650 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000651 # Safe expressed as bytes rather than str
652 result = urllib.parse.quote(quote_by_default, safe=b"<>")
653 self.assertEqual(quote_by_default, result,
654 "using quote(): %r != %r" % (quote_by_default, result))
655 # "Safe" non-ASCII characters should have no effect
656 # (Since URIs are not allowed to have non-ASCII characters)
657 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
658 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
659 self.assertEqual(expect, result,
660 "using quote(): %r != %r" %
661 (expect, result))
662 # Same as above, but using a bytes rather than str
663 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
664 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
665 self.assertEqual(expect, result,
666 "using quote(): %r != %r" %
667 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000668
669 def test_default_quoting(self):
670 # Make sure all characters that should be quoted are by default sans
671 # space (separate test for that).
672 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
673 should_quote.append('<>#%"{}|\^[]`')
674 should_quote.append(chr(127)) # For 0x7F
675 should_quote = ''.join(should_quote)
676 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000677 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000678 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000679 "using quote(): "
680 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000681 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000682 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000683 self.assertEqual(hexescape(char), result,
684 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000685 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000686 (char, hexescape(char), result))
687 del should_quote
688 partial_quote = "ab[]cd"
689 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000690 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000691 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000692 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800693 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000694 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000695 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000696
697 def test_quoting_space(self):
698 # Make sure quote() and quote_plus() handle spaces as specified in
699 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000700 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000701 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000702 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000703 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000704 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000705 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000706 given = "a b cd e f"
707 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000708 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000709 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000710 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000711 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000712 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000713 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000714 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000715
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000716 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000717 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000718 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000719 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000720 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000721 # Test with bytes
722 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
723 'alpha%2Bbeta+gamma')
724 # Test with safe bytes
725 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
726 'alpha+beta+gamma')
727
728 def test_quote_bytes(self):
729 # Bytes should quote directly to percent-encoded values
730 given = b"\xa2\xd8ab\xff"
731 expect = "%A2%D8ab%FF"
732 result = urllib.parse.quote(given)
733 self.assertEqual(expect, result,
734 "using quote(): %r != %r" % (expect, result))
735 # Encoding argument should raise type error on bytes input
736 self.assertRaises(TypeError, urllib.parse.quote, given,
737 encoding="latin-1")
738 # quote_from_bytes should work the same
739 result = urllib.parse.quote_from_bytes(given)
740 self.assertEqual(expect, result,
741 "using quote_from_bytes(): %r != %r"
742 % (expect, result))
743
744 def test_quote_with_unicode(self):
745 # Characters in Latin-1 range, encoded by default in UTF-8
746 given = "\xa2\xd8ab\xff"
747 expect = "%C2%A2%C3%98ab%C3%BF"
748 result = urllib.parse.quote(given)
749 self.assertEqual(expect, result,
750 "using quote(): %r != %r" % (expect, result))
751 # Characters in Latin-1 range, encoded by with None (default)
752 result = urllib.parse.quote(given, encoding=None, errors=None)
753 self.assertEqual(expect, result,
754 "using quote(): %r != %r" % (expect, result))
755 # Characters in Latin-1 range, encoded with Latin-1
756 given = "\xa2\xd8ab\xff"
757 expect = "%A2%D8ab%FF"
758 result = urllib.parse.quote(given, encoding="latin-1")
759 self.assertEqual(expect, result,
760 "using quote(): %r != %r" % (expect, result))
761 # Characters in BMP, encoded by default in UTF-8
762 given = "\u6f22\u5b57" # "Kanji"
763 expect = "%E6%BC%A2%E5%AD%97"
764 result = urllib.parse.quote(given)
765 self.assertEqual(expect, result,
766 "using quote(): %r != %r" % (expect, result))
767 # Characters in BMP, encoded with Latin-1
768 given = "\u6f22\u5b57"
769 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
770 encoding="latin-1")
771 # Characters in BMP, encoded with Latin-1, with replace error handling
772 given = "\u6f22\u5b57"
773 expect = "%3F%3F" # "??"
774 result = urllib.parse.quote(given, encoding="latin-1",
775 errors="replace")
776 self.assertEqual(expect, result,
777 "using quote(): %r != %r" % (expect, result))
778 # Characters in BMP, Latin-1, with xmlcharref error handling
779 given = "\u6f22\u5b57"
780 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
781 result = urllib.parse.quote(given, encoding="latin-1",
782 errors="xmlcharrefreplace")
783 self.assertEqual(expect, result,
784 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000785
Georg Brandlfaf41492009-05-26 18:31:11 +0000786 def test_quote_plus_with_unicode(self):
787 # Encoding (latin-1) test for quote_plus
788 given = "\xa2\xd8 \xff"
789 expect = "%A2%D8+%FF"
790 result = urllib.parse.quote_plus(given, encoding="latin-1")
791 self.assertEqual(expect, result,
792 "using quote_plus(): %r != %r" % (expect, result))
793 # Errors test for quote_plus
794 given = "ab\u6f22\u5b57 cd"
795 expect = "ab%3F%3F+cd"
796 result = urllib.parse.quote_plus(given, encoding="latin-1",
797 errors="replace")
798 self.assertEqual(expect, result,
799 "using quote_plus(): %r != %r" % (expect, result))
800
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000801
Brett Cannon74bfd702003-04-25 09:39:47 +0000802class UnquotingTests(unittest.TestCase):
803 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000804
Brett Cannon74bfd702003-04-25 09:39:47 +0000805 See the doc string for quoting_Tests for details on quoting and such.
806
807 """
808
809 def test_unquoting(self):
810 # Make sure unquoting of all ASCII values works
811 escape_list = []
812 for num in range(128):
813 given = hexescape(chr(num))
814 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000816 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000817 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000818 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000819 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000820 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000821 (expect, result))
822 escape_list.append(given)
823 escape_string = ''.join(escape_list)
824 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000826 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000827 "using unquote(): not all characters escaped: "
828 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000829 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
830 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000831 with support.check_warnings(('', BytesWarning), quiet=True):
832 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000833
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000834 def test_unquoting_badpercent(self):
835 # Test unquoting on bad percent-escapes
836 given = '%xab'
837 expect = given
838 result = urllib.parse.unquote(given)
839 self.assertEqual(expect, result, "using unquote(): %r != %r"
840 % (expect, result))
841 given = '%x'
842 expect = given
843 result = urllib.parse.unquote(given)
844 self.assertEqual(expect, result, "using unquote(): %r != %r"
845 % (expect, result))
846 given = '%'
847 expect = given
848 result = urllib.parse.unquote(given)
849 self.assertEqual(expect, result, "using unquote(): %r != %r"
850 % (expect, result))
851 # unquote_to_bytes
852 given = '%xab'
853 expect = bytes(given, 'ascii')
854 result = urllib.parse.unquote_to_bytes(given)
855 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
856 % (expect, result))
857 given = '%x'
858 expect = bytes(given, 'ascii')
859 result = urllib.parse.unquote_to_bytes(given)
860 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
861 % (expect, result))
862 given = '%'
863 expect = bytes(given, 'ascii')
864 result = urllib.parse.unquote_to_bytes(given)
865 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
866 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000867 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
868 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000869
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000870 def test_unquoting_mixed_case(self):
871 # Test unquoting on mixed-case hex digits in the percent-escapes
872 given = '%Ab%eA'
873 expect = b'\xab\xea'
874 result = urllib.parse.unquote_to_bytes(given)
875 self.assertEqual(expect, result,
876 "using unquote_to_bytes(): %r != %r"
877 % (expect, result))
878
Brett Cannon74bfd702003-04-25 09:39:47 +0000879 def test_unquoting_parts(self):
880 # Make sure unquoting works when have non-quoted characters
881 # interspersed
882 given = 'ab%sd' % hexescape('c')
883 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000884 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000885 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000886 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000887 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000888 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000889 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000890
Brett Cannon74bfd702003-04-25 09:39:47 +0000891 def test_unquoting_plus(self):
892 # Test difference between unquote() and unquote_plus()
893 given = "are+there+spaces..."
894 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000895 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000896 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000897 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000898 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000899 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000900 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000901 "using unquote_plus(): %r != %r" % (expect, result))
902
903 def test_unquote_to_bytes(self):
904 given = 'br%C3%BCckner_sapporo_20050930.doc'
905 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
906 result = urllib.parse.unquote_to_bytes(given)
907 self.assertEqual(expect, result,
908 "using unquote_to_bytes(): %r != %r"
909 % (expect, result))
910 # Test on a string with unescaped non-ASCII characters
911 # (Technically an invalid URI; expect those characters to be UTF-8
912 # encoded).
913 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
914 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
915 self.assertEqual(expect, result,
916 "using unquote_to_bytes(): %r != %r"
917 % (expect, result))
918 # Test with a bytes as input
919 given = b'%A2%D8ab%FF'
920 expect = b'\xa2\xd8ab\xff'
921 result = urllib.parse.unquote_to_bytes(given)
922 self.assertEqual(expect, result,
923 "using unquote_to_bytes(): %r != %r"
924 % (expect, result))
925 # Test with a bytes as input, with unescaped non-ASCII bytes
926 # (Technically an invalid URI; expect those bytes to be preserved)
927 given = b'%A2\xd8ab%FF'
928 expect = b'\xa2\xd8ab\xff'
929 result = urllib.parse.unquote_to_bytes(given)
930 self.assertEqual(expect, result,
931 "using unquote_to_bytes(): %r != %r"
932 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000933
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000934 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000935 # Characters in the Latin-1 range, encoded with UTF-8
936 given = 'br%C3%BCckner_sapporo_20050930.doc'
937 expect = 'br\u00fcckner_sapporo_20050930.doc'
938 result = urllib.parse.unquote(given)
939 self.assertEqual(expect, result,
940 "using unquote(): %r != %r" % (expect, result))
941 # Characters in the Latin-1 range, encoded with None (default)
942 result = urllib.parse.unquote(given, encoding=None, errors=None)
943 self.assertEqual(expect, result,
944 "using unquote(): %r != %r" % (expect, result))
945
946 # Characters in the Latin-1 range, encoded with Latin-1
947 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
948 encoding="latin-1")
949 expect = 'br\u00fcckner_sapporo_20050930.doc'
950 self.assertEqual(expect, result,
951 "using unquote(): %r != %r" % (expect, result))
952
953 # Characters in BMP, encoded with UTF-8
954 given = "%E6%BC%A2%E5%AD%97"
955 expect = "\u6f22\u5b57" # "Kanji"
956 result = urllib.parse.unquote(given)
957 self.assertEqual(expect, result,
958 "using unquote(): %r != %r" % (expect, result))
959
960 # Decode with UTF-8, invalid sequence
961 given = "%F3%B1"
962 expect = "\ufffd" # Replacement character
963 result = urllib.parse.unquote(given)
964 self.assertEqual(expect, result,
965 "using unquote(): %r != %r" % (expect, result))
966
967 # Decode with UTF-8, invalid sequence, replace errors
968 result = urllib.parse.unquote(given, errors="replace")
969 self.assertEqual(expect, result,
970 "using unquote(): %r != %r" % (expect, result))
971
972 # Decode with UTF-8, invalid sequence, ignoring errors
973 given = "%F3%B1"
974 expect = ""
975 result = urllib.parse.unquote(given, errors="ignore")
976 self.assertEqual(expect, result,
977 "using unquote(): %r != %r" % (expect, result))
978
979 # A mix of non-ASCII and percent-encoded characters, UTF-8
980 result = urllib.parse.unquote("\u6f22%C3%BC")
981 expect = '\u6f22\u00fc'
982 self.assertEqual(expect, result,
983 "using unquote(): %r != %r" % (expect, result))
984
985 # A mix of non-ASCII and percent-encoded characters, Latin-1
986 # (Note, the string contains non-Latin-1-representable characters)
987 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
988 expect = '\u6f22\u00fc'
989 self.assertEqual(expect, result,
990 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000991
Brett Cannon74bfd702003-04-25 09:39:47 +0000992class urlencode_Tests(unittest.TestCase):
993 """Tests for urlencode()"""
994
995 def help_inputtype(self, given, test_type):
996 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000997
Brett Cannon74bfd702003-04-25 09:39:47 +0000998 'given' must lead to only the pairs:
999 * 1st, 1
1000 * 2nd, 2
1001 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001002
Brett Cannon74bfd702003-04-25 09:39:47 +00001003 Test cannot assume anything about order. Docs make no guarantee and
1004 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001005
Brett Cannon74bfd702003-04-25 09:39:47 +00001006 """
1007 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001008 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001009 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001010 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001011 "testing %s: %s not found in %s" %
1012 (test_type, expected, result))
1013 self.assertEqual(result.count('&'), 2,
1014 "testing %s: expected 2 '&'s; got %s" %
1015 (test_type, result.count('&')))
1016 amp_location = result.index('&')
1017 on_amp_left = result[amp_location - 1]
1018 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001019 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001020 "testing %s: '&' not located in proper place in %s" %
1021 (test_type, result))
1022 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1023 "testing %s: "
1024 "unexpected number of characters: %s != %s" %
1025 (test_type, len(result), (5 * 3) + 2))
1026
1027 def test_using_mapping(self):
1028 # Test passing in a mapping object as an argument.
1029 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1030 "using dict as input type")
1031
1032 def test_using_sequence(self):
1033 # Test passing in a sequence of two-item sequences as an argument.
1034 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1035 "using sequence of two-item tuples as input")
1036
1037 def test_quoting(self):
1038 # Make sure keys and values are quoted using quote_plus()
1039 given = {"&":"="}
1040 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001041 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001042 self.assertEqual(expect, result)
1043 given = {"key name":"A bunch of pluses"}
1044 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001045 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001046 self.assertEqual(expect, result)
1047
1048 def test_doseq(self):
1049 # Test that passing True for 'doseq' parameter works correctly
1050 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001051 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1052 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001053 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001054 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001055 for value in given["sequence"]:
1056 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001057 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001058 self.assertEqual(result.count('&'), 2,
1059 "Expected 2 '&'s, got %s" % result.count('&'))
1060
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001061 def test_empty_sequence(self):
1062 self.assertEqual("", urllib.parse.urlencode({}))
1063 self.assertEqual("", urllib.parse.urlencode([]))
1064
1065 def test_nonstring_values(self):
1066 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1067 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1068
1069 def test_nonstring_seq_values(self):
1070 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1071 self.assertEqual("a=None&a=a",
1072 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001073 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001074 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001075 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001076
Senthil Kumarandf022da2010-07-03 17:48:22 +00001077 def test_urlencode_encoding(self):
1078 # ASCII encoding. Expect %3F with errors="replace'
1079 given = (('\u00a0', '\u00c1'),)
1080 expect = '%3F=%3F'
1081 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1082 self.assertEqual(expect, result)
1083
1084 # Default is UTF-8 encoding.
1085 given = (('\u00a0', '\u00c1'),)
1086 expect = '%C2%A0=%C3%81'
1087 result = urllib.parse.urlencode(given)
1088 self.assertEqual(expect, result)
1089
1090 # Latin-1 encoding.
1091 given = (('\u00a0', '\u00c1'),)
1092 expect = '%A0=%C1'
1093 result = urllib.parse.urlencode(given, encoding="latin-1")
1094 self.assertEqual(expect, result)
1095
1096 def test_urlencode_encoding_doseq(self):
1097 # ASCII Encoding. Expect %3F with errors="replace'
1098 given = (('\u00a0', '\u00c1'),)
1099 expect = '%3F=%3F'
1100 result = urllib.parse.urlencode(given, doseq=True,
1101 encoding="ASCII", errors="replace")
1102 self.assertEqual(expect, result)
1103
1104 # ASCII Encoding. On a sequence of values.
1105 given = (("\u00a0", (1, "\u00c1")),)
1106 expect = '%3F=1&%3F=%3F'
1107 result = urllib.parse.urlencode(given, True,
1108 encoding="ASCII", errors="replace")
1109 self.assertEqual(expect, result)
1110
1111 # Utf-8
1112 given = (("\u00a0", "\u00c1"),)
1113 expect = '%C2%A0=%C3%81'
1114 result = urllib.parse.urlencode(given, True)
1115 self.assertEqual(expect, result)
1116
1117 given = (("\u00a0", (42, "\u00c1")),)
1118 expect = '%C2%A0=42&%C2%A0=%C3%81'
1119 result = urllib.parse.urlencode(given, True)
1120 self.assertEqual(expect, result)
1121
1122 # latin-1
1123 given = (("\u00a0", "\u00c1"),)
1124 expect = '%A0=%C1'
1125 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1126 self.assertEqual(expect, result)
1127
1128 given = (("\u00a0", (42, "\u00c1")),)
1129 expect = '%A0=42&%A0=%C1'
1130 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1131 self.assertEqual(expect, result)
1132
1133 def test_urlencode_bytes(self):
1134 given = ((b'\xa0\x24', b'\xc1\x24'),)
1135 expect = '%A0%24=%C1%24'
1136 result = urllib.parse.urlencode(given)
1137 self.assertEqual(expect, result)
1138 result = urllib.parse.urlencode(given, True)
1139 self.assertEqual(expect, result)
1140
1141 # Sequence of values
1142 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1143 expect = '%A0%24=42&%A0%24=%C1%24'
1144 result = urllib.parse.urlencode(given, True)
1145 self.assertEqual(expect, result)
1146
1147 def test_urlencode_encoding_safe_parameter(self):
1148
1149 # Send '$' (\x24) as safe character
1150 # Default utf-8 encoding
1151
1152 given = ((b'\xa0\x24', b'\xc1\x24'),)
1153 result = urllib.parse.urlencode(given, safe=":$")
1154 expect = '%A0$=%C1$'
1155 self.assertEqual(expect, result)
1156
1157 given = ((b'\xa0\x24', b'\xc1\x24'),)
1158 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1159 expect = '%A0$=%C1$'
1160 self.assertEqual(expect, result)
1161
1162 # Safe parameter in sequence
1163 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1164 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1165 result = urllib.parse.urlencode(given, True, safe=":$")
1166 self.assertEqual(expect, result)
1167
1168 # Test all above in latin-1 encoding
1169
1170 given = ((b'\xa0\x24', b'\xc1\x24'),)
1171 result = urllib.parse.urlencode(given, safe=":$",
1172 encoding="latin-1")
1173 expect = '%A0$=%C1$'
1174 self.assertEqual(expect, result)
1175
1176 given = ((b'\xa0\x24', b'\xc1\x24'),)
1177 expect = '%A0$=%C1$'
1178 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1179 encoding="latin-1")
1180
1181 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1182 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1183 result = urllib.parse.urlencode(given, True, safe=":$",
1184 encoding="latin-1")
1185 self.assertEqual(expect, result)
1186
Brett Cannon74bfd702003-04-25 09:39:47 +00001187class Pathname_Tests(unittest.TestCase):
1188 """Test pathname2url() and url2pathname()"""
1189
1190 def test_basic(self):
1191 # Make sure simple tests pass
1192 expected_path = os.path.join("parts", "of", "a", "path")
1193 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001194 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001195 self.assertEqual(expected_url, result,
1196 "pathname2url() failed; %s != %s" %
1197 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001198 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001199 self.assertEqual(expected_path, result,
1200 "url2pathame() failed; %s != %s" %
1201 (result, expected_path))
1202
1203 def test_quoting(self):
1204 # Test automatic quoting and unquoting works for pathnam2url() and
1205 # url2pathname() respectively
1206 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001207 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1208 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001209 self.assertEqual(expect, result,
1210 "pathname2url() failed; %s != %s" %
1211 (expect, result))
1212 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001213 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001214 self.assertEqual(expect, result,
1215 "url2pathname() failed; %s != %s" %
1216 (expect, result))
1217 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001218 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1219 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001220 self.assertEqual(expect, result,
1221 "pathname2url() failed; %s != %s" %
1222 (expect, result))
1223 given = "make+sure/using_unquote"
1224 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001225 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001226 self.assertEqual(expect, result,
1227 "url2pathname() failed; %s != %s" %
1228 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001229
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001230 @unittest.skipUnless(sys.platform == 'win32',
1231 'test specific to the urllib.url2path function.')
1232 def test_ntpath(self):
1233 given = ('/C:/', '///C:/', '/C|//')
1234 expect = 'C:\\'
1235 for url in given:
1236 result = urllib.request.url2pathname(url)
1237 self.assertEqual(expect, result,
1238 'urllib.request..url2pathname() failed; %s != %s' %
1239 (expect, result))
1240 given = '///C|/path'
1241 expect = 'C:\\path'
1242 result = urllib.request.url2pathname(given)
1243 self.assertEqual(expect, result,
1244 'urllib.request.url2pathname() failed; %s != %s' %
1245 (expect, result))
1246
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001247class Utility_Tests(unittest.TestCase):
1248 """Testcase to test the various utility functions in the urllib."""
1249
1250 def test_splitpasswd(self):
1251 """Some of password examples are not sensible, but it is added to
1252 confirming to RFC2617 and addressing issue4675.
1253 """
1254 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1255 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1256 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1257 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1258 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1259 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1260 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001261 self.assertEqual(('user', 'a b'),urllib.parse.splitpasswd('user:a b'))
1262 self.assertEqual(('user 2', 'ab'),urllib.parse.splitpasswd('user 2:ab'))
1263 self.assertEqual(('user+1', 'a+b'),urllib.parse.splitpasswd('user+1:a+b'))
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001264
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001265 def test_thishost(self):
1266 """Test the urllib.request.thishost utility function returns a tuple"""
1267 self.assertIsInstance(urllib.request.thishost(), tuple)
1268
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001269
1270class URLopener_Tests(unittest.TestCase):
1271 """Testcase to test the open method of URLopener class."""
1272
1273 def test_quoted_open(self):
1274 class DummyURLopener(urllib.request.URLopener):
1275 def open_spam(self, url):
1276 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001277 with support.check_warnings(
1278 ('DummyURLopener style of invoking requests is deprecated.',
1279 DeprecationWarning)):
1280 self.assertEqual(DummyURLopener().open(
1281 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001282
Ezio Melotti79b99db2013-02-21 02:41:42 +02001283 # test the safe characters are not quoted by urlopen
1284 self.assertEqual(DummyURLopener().open(
1285 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1286 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001287
Guido van Rossume7ba4952007-06-06 23:52:48 +00001288# Just commented them out.
1289# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001290# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001291# fail in one of the tests, sometimes in other. I have a linux, and
1292# the tests go ok.
1293# If anybody has one of the problematic enviroments, please help!
1294# . Facundo
1295#
1296# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001297# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001298# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1299# serv.settimeout(3)
1300# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1301# serv.bind(("", 9093))
1302# serv.listen(5)
1303# try:
1304# conn, addr = serv.accept()
1305# conn.send("1 Hola mundo\n")
1306# cantdata = 0
1307# while cantdata < 13:
1308# data = conn.recv(13-cantdata)
1309# cantdata += len(data)
1310# time.sleep(.3)
1311# conn.send("2 No more lines\n")
1312# conn.close()
1313# except socket.timeout:
1314# pass
1315# finally:
1316# serv.close()
1317# evt.set()
1318#
1319# class FTPWrapperTests(unittest.TestCase):
1320#
1321# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001322# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001323# ftplib.FTP.port = 9093
1324# self.evt = threading.Event()
1325# threading.Thread(target=server, args=(self.evt,)).start()
1326# time.sleep(.1)
1327#
1328# def tearDown(self):
1329# self.evt.wait()
1330#
1331# def testBasic(self):
1332# # connects
1333# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001334# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001335#
1336# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001337# # global default timeout is ignored
1338# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001339# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001340# socket.setdefaulttimeout(30)
1341# try:
1342# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1343# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001344# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001345# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001346# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001347#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001348# def testTimeoutDefault(self):
1349# # global default timeout is used
1350# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001351# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001352# socket.setdefaulttimeout(30)
1353# try:
1354# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1355# finally:
1356# socket.setdefaulttimeout(None)
1357# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1358# ftp.close()
1359#
1360# def testTimeoutValue(self):
1361# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1362# timeout=30)
1363# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1364# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001365
Senthil Kumarande49d642011-10-16 23:54:44 +08001366class RequestTests(unittest.TestCase):
1367 """Unit tests for urllib.request.Request."""
1368
1369 def test_default_values(self):
1370 Request = urllib.request.Request
1371 request = Request("http://www.python.org")
1372 self.assertEqual(request.get_method(), 'GET')
1373 request = Request("http://www.python.org", {})
1374 self.assertEqual(request.get_method(), 'POST')
1375
1376 def test_with_method_arg(self):
1377 Request = urllib.request.Request
1378 request = Request("http://www.python.org", method='HEAD')
1379 self.assertEqual(request.method, 'HEAD')
1380 self.assertEqual(request.get_method(), 'HEAD')
1381 request = Request("http://www.python.org", {}, method='HEAD')
1382 self.assertEqual(request.method, 'HEAD')
1383 self.assertEqual(request.get_method(), 'HEAD')
1384 request = Request("http://www.python.org", method='GET')
1385 self.assertEqual(request.get_method(), 'GET')
1386 request.method = 'HEAD'
1387 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001388
1389
Brett Cannon74bfd702003-04-25 09:39:47 +00001390def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001391 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001392 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001393 urlopen_HttpTests,
Antoine Pitroudf204be2012-11-24 17:59:08 +01001394 urlopen_DataTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001395 urlretrieve_FileTests,
Senthil Kumarance260142011-11-01 01:35:17 +08001396 urlretrieve_HttpTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001397 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001398 QuotingTests,
1399 UnquotingTests,
1400 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001401 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001402 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001403 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001404 #FTPWrapperTests,
Senthil Kumarande49d642011-10-16 23:54:44 +08001405 RequestTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001406 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001407
1408
1409
1410if __name__ == '__main__':
1411 test_main()