blob: 1e30fa622c2e60d77ed55ef07488b4171ee20b60 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Jeremy Hylton6102e292000-08-31 15:48:10 +000014
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080015from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010016import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080017
Brett Cannon74bfd702003-04-25 09:39:47 +000018def hexescape(char):
19 """Escape char as RFC 2396 specifies"""
20 hex_repr = hex(ord(char))[2:].upper()
21 if len(hex_repr) == 1:
22 hex_repr = "0%s" % hex_repr
23 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000024
Jeremy Hylton1afc1692008-06-18 20:49:58 +000025# Shortcut for testing FancyURLopener
26_urlopener = None
27def urlopen(url, data=None, proxies=None):
28 """urlopen(url [, data]) -> open file-like object"""
29 global _urlopener
30 if proxies is not None:
31 opener = urllib.request.FancyURLopener(proxies=proxies)
32 elif not _urlopener:
33 opener = urllib.request.FancyURLopener()
34 _urlopener = opener
35 else:
36 opener = _urlopener
37 if data is None:
38 return opener.open(url)
39 else:
40 return opener.open(url, data)
41
Senthil Kumarance260142011-11-01 01:35:17 +080042
43class FakeHTTPMixin(object):
44 def fakehttp(self, fakedata):
45 class FakeSocket(io.BytesIO):
46 io_refs = 1
47
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080048 def sendall(self, data):
49 FakeHTTPConnection.buf = data
Senthil Kumarance260142011-11-01 01:35:17 +080050
51 def makefile(self, *args, **kwds):
52 self.io_refs += 1
53 return self
54
55 def read(self, amt=None):
56 if self.closed:
57 return b""
58 return io.BytesIO.read(self, amt)
59
60 def readline(self, length=None):
61 if self.closed:
62 return b""
63 return io.BytesIO.readline(self, length)
64
65 def close(self):
66 self.io_refs -= 1
67 if self.io_refs == 0:
68 io.BytesIO.close(self)
69
70 class FakeHTTPConnection(http.client.HTTPConnection):
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080071
72 # buffer to store data for verification in urlopen tests.
73 buf = None
74
Senthil Kumarance260142011-11-01 01:35:17 +080075 def connect(self):
76 self.sock = FakeSocket(fakedata)
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080077
Senthil Kumarance260142011-11-01 01:35:17 +080078 self._connection_class = http.client.HTTPConnection
79 http.client.HTTPConnection = FakeHTTPConnection
80
81 def unfakehttp(self):
82 http.client.HTTPConnection = self._connection_class
83
84
Brett Cannon74bfd702003-04-25 09:39:47 +000085class urlopen_FileTests(unittest.TestCase):
86 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000087
Brett Cannon74bfd702003-04-25 09:39:47 +000088 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000089 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000090
Brett Cannon74bfd702003-04-25 09:39:47 +000091 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000092
Brett Cannon74bfd702003-04-25 09:39:47 +000093 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000094 # Create a temp file to use for testing
95 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
96 "ascii")
97 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000098 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000099 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000100 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000102 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000103 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000104
Brett Cannon74bfd702003-04-25 09:39:47 +0000105 def tearDown(self):
106 """Shut down the open object"""
107 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000108 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000109
Brett Cannon74bfd702003-04-25 09:39:47 +0000110 def test_interface(self):
111 # Make sure object returned by urlopen() has the specified methods
112 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000113 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000114 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000115 "object returned by urlopen() lacks %s attribute" %
116 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000117
Brett Cannon74bfd702003-04-25 09:39:47 +0000118 def test_read(self):
119 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000120
Brett Cannon74bfd702003-04-25 09:39:47 +0000121 def test_readline(self):
122 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000123 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000124 "calling readline() after exhausting the file did not"
125 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000126
Brett Cannon74bfd702003-04-25 09:39:47 +0000127 def test_readlines(self):
128 lines_list = self.returned_obj.readlines()
129 self.assertEqual(len(lines_list), 1,
130 "readlines() returned the wrong number of lines")
131 self.assertEqual(lines_list[0], self.text,
132 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000133
Brett Cannon74bfd702003-04-25 09:39:47 +0000134 def test_fileno(self):
135 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000136 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000137 self.assertEqual(os.read(file_num, len(self.text)), self.text,
138 "Reading on the file descriptor returned by fileno() "
139 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000140
Brett Cannon74bfd702003-04-25 09:39:47 +0000141 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800142 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000143 # by the tearDown() method for the test
144 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000145
Brett Cannon74bfd702003-04-25 09:39:47 +0000146 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000147 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000148
Brett Cannon74bfd702003-04-25 09:39:47 +0000149 def test_geturl(self):
150 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000151
Christian Heimes9bd667a2008-01-20 15:14:11 +0000152 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000153 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000154
Brett Cannon74bfd702003-04-25 09:39:47 +0000155 def test_iter(self):
156 # Test iterator
157 # Don't need to count number of iterations since test would fail the
158 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200159 # comparison.
160 # Use the iterator in the usual implicit way to test for ticket #4608.
161 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000162 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000163
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800164 def test_relativelocalfile(self):
165 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
166
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000167class ProxyTests(unittest.TestCase):
168
169 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000170 # Records changes to env vars
171 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000172 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000173 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000174 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000175 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000176
177 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000178 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000179 self.env.__exit__()
180 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000181
182 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000183 self.env.set('NO_PROXY', 'localhost')
184 proxies = urllib.request.getproxies_environment()
185 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000186 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800187 # List of no_proxies with space.
188 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
189 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000190
Senthil Kumarance260142011-11-01 01:35:17 +0800191class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000192 """Test urlopen() opening a fake http connection."""
193
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000194 def check_read(self, ver):
195 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000196 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000197 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000198 self.assertEqual(fp.readline(), b"Hello!")
199 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000200 self.assertEqual(fp.geturl(), 'http://python.org/')
201 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000202 finally:
203 self.unfakehttp()
204
Senthil Kumaran26430412011-04-13 07:01:19 +0800205 def test_url_fragment(self):
206 # Issue #11703: geturl() omits fragments in the original URL.
207 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800208 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800209 try:
210 fp = urllib.request.urlopen(url)
211 self.assertEqual(fp.geturl(), url)
212 finally:
213 self.unfakehttp()
214
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800215 def test_willclose(self):
216 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800217 try:
218 resp = urlopen("http://www.python.org")
219 self.assertTrue(resp.fp.will_close)
220 finally:
221 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800222
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000223 def test_read_0_9(self):
224 # "0.9" response accepted (but not "simple responses" without
225 # a status line)
226 self.check_read(b"0.9")
227
228 def test_read_1_0(self):
229 self.check_read(b"1.0")
230
231 def test_read_1_1(self):
232 self.check_read(b"1.1")
233
Christian Heimes57dddfb2008-01-02 18:30:52 +0000234 def test_read_bogus(self):
235 # urlopen() should raise IOError for many error codes.
236 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
237Date: Wed, 02 Jan 2008 03:03:54 GMT
238Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
239Connection: close
240Content-Type: text/html; charset=iso-8859-1
241''')
242 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000243 self.assertRaises(IOError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000244 finally:
245 self.unfakehttp()
246
guido@google.coma119df92011-03-29 11:41:02 -0700247 def test_invalid_redirect(self):
248 # urlopen() should raise IOError for many error codes.
249 self.fakehttp(b'''HTTP/1.1 302 Found
250Date: Wed, 02 Jan 2008 03:03:54 GMT
251Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
252Location: file://guidocomputer.athome.com:/python/license
253Connection: close
254Content-Type: text/html; charset=iso-8859-1
255''')
256 try:
257 self.assertRaises(urllib.error.HTTPError, urlopen,
258 "http://python.org/")
259 finally:
260 self.unfakehttp()
261
Guido van Rossumd8faa362007-04-27 19:54:29 +0000262 def test_empty_socket(self):
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000263 # urlopen() raises IOError if the underlying socket does not send any
264 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000265 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000266 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000267 self.assertRaises(IOError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000268 finally:
269 self.unfakehttp()
270
Senthil Kumaranf5776862012-10-21 13:30:02 -0700271 def test_missing_localfile(self):
272 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700273 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700274 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700275 self.assertTrue(e.exception.filename)
276 self.assertTrue(e.exception.reason)
277
278 def test_file_notexists(self):
279 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700280 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700281 try:
282 self.assertTrue(os.path.exists(tmp_file))
283 with urlopen(tmp_fileurl) as fobj:
284 self.assertTrue(fobj)
285 finally:
286 os.close(fd)
287 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700288 self.assertFalse(os.path.exists(tmp_file))
289 with self.assertRaises(urllib.error.URLError):
290 urlopen(tmp_fileurl)
291
292 def test_ftp_nohost(self):
293 test_ftp_url = 'ftp:///path'
294 with self.assertRaises(urllib.error.URLError) as e:
295 urlopen(test_ftp_url)
296 self.assertFalse(e.exception.filename)
297 self.assertTrue(e.exception.reason)
298
299 def test_ftp_nonexisting(self):
300 with self.assertRaises(urllib.error.URLError) as e:
301 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
302 self.assertFalse(e.exception.filename)
303 self.assertTrue(e.exception.reason)
304
Senthil Kumaranf5776862012-10-21 13:30:02 -0700305
Senthil Kumarande0eb242010-08-01 17:53:37 +0000306 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000307 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000308 try:
309 fp = urlopen("http://user:pass@python.org/")
310 self.assertEqual(fp.readline(), b"Hello!")
311 self.assertEqual(fp.readline(), b"")
312 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
313 self.assertEqual(fp.getcode(), 200)
314 finally:
315 self.unfakehttp()
316
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800317 def test_userpass_inurl_w_spaces(self):
318 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
319 try:
320 userpass = "a b:c d"
321 url = "http://{}@python.org/".format(userpass)
322 fakehttp_wrapper = http.client.HTTPConnection
323 authorization = ("Authorization: Basic %s\r\n" %
324 b64encode(userpass.encode("ASCII")).decode("ASCII"))
325 fp = urlopen(url)
326 # The authorization header must be in place
327 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
328 self.assertEqual(fp.readline(), b"Hello!")
329 self.assertEqual(fp.readline(), b"")
330 # the spaces are quoted in URL so no match
331 self.assertNotEqual(fp.geturl(), url)
332 self.assertEqual(fp.getcode(), 200)
333 finally:
334 self.unfakehttp()
335
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700336 def test_URLopener_deprecation(self):
337 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700338 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700339
Antoine Pitroudf204be2012-11-24 17:59:08 +0100340class urlopen_DataTests(unittest.TestCase):
341 """Test urlopen() opening a data URL."""
342
343 def setUp(self):
344 # text containing URL special- and unicode-characters
345 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
346 # 2x1 pixel RGB PNG image with one black and one white pixel
347 self.image = (
348 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
349 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
350 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
351 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
352
353 self.text_url = (
354 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
355 "D%26%20%C3%B6%20%C3%84%20")
356 self.text_url_base64 = (
357 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
358 "sJT0mIPYgxCA%3D")
359 # base64 encoded data URL that contains ignorable spaces,
360 # such as "\n", " ", "%0A", and "%20".
361 self.image_url = (
362 "\n"
363 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
364 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
365
366 self.text_url_resp = urllib.request.urlopen(self.text_url)
367 self.text_url_base64_resp = urllib.request.urlopen(
368 self.text_url_base64)
369 self.image_url_resp = urllib.request.urlopen(self.image_url)
370
371 def test_interface(self):
372 # Make sure object returned by urlopen() has the specified methods
373 for attr in ("read", "readline", "readlines",
374 "close", "info", "geturl", "getcode", "__iter__"):
375 self.assertTrue(hasattr(self.text_url_resp, attr),
376 "object returned by urlopen() lacks %s attribute" %
377 attr)
378
379 def test_info(self):
380 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
381 self.assertEqual(self.text_url_base64_resp.info().get_params(),
382 [('text/plain', ''), ('charset', 'ISO-8859-1')])
383 self.assertEqual(self.image_url_resp.info()['content-length'],
384 str(len(self.image)))
385 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
386 [('text/plain', ''), ('charset', 'US-ASCII')])
387
388 def test_geturl(self):
389 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
390 self.assertEqual(self.text_url_base64_resp.geturl(),
391 self.text_url_base64)
392 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
393
394 def test_read_text(self):
395 self.assertEqual(self.text_url_resp.read().decode(
396 dict(self.text_url_resp.info().get_params())['charset']), self.text)
397
398 def test_read_text_base64(self):
399 self.assertEqual(self.text_url_base64_resp.read().decode(
400 dict(self.text_url_base64_resp.info().get_params())['charset']),
401 self.text)
402
403 def test_read_image(self):
404 self.assertEqual(self.image_url_resp.read(), self.image)
405
406 def test_missing_comma(self):
407 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
408
409 def test_invalid_base64_data(self):
410 # missing padding character
411 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
412
Brett Cannon19691362003-04-29 05:08:06 +0000413class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000414 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000415
Brett Cannon19691362003-04-29 05:08:06 +0000416 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000417 # Create a list of temporary files. Each item in the list is a file
418 # name (absolute path or relative to the current working directory).
419 # All files in this list will be deleted in the tearDown method. Note,
420 # this only helps to makes sure temporary files get deleted, but it
421 # does nothing about trying to close files that may still be open. It
422 # is the responsibility of the developer to properly close files even
423 # when exceptional conditions occur.
424 self.tempFiles = []
425
Brett Cannon19691362003-04-29 05:08:06 +0000426 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000427 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000428 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000429 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000430 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000431 FILE.write(self.text)
432 FILE.close()
433 finally:
434 try: FILE.close()
435 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000436
437 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000438 # Delete the temporary files.
439 for each in self.tempFiles:
440 try: os.remove(each)
441 except: pass
442
443 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000444 filePath = os.path.abspath(filePath)
445 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000446 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000447 except UnicodeEncodeError:
448 raise unittest.SkipTest("filePath is not encodable to utf8")
449 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000450
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000451 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000452 """Creates a new temporary file containing the specified data,
453 registers the file for deletion during the test fixture tear down, and
454 returns the absolute path of the file."""
455
456 newFd, newFilePath = tempfile.mkstemp()
457 try:
458 self.registerFileForCleanUp(newFilePath)
459 newFile = os.fdopen(newFd, "wb")
460 newFile.write(data)
461 newFile.close()
462 finally:
463 try: newFile.close()
464 except: pass
465 return newFilePath
466
467 def registerFileForCleanUp(self, fileName):
468 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000469
470 def test_basic(self):
471 # Make sure that a local file just gets its own location returned and
472 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000473 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000474 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000475 self.assertIsInstance(result[1], email.message.Message,
476 "did not get a email.message.Message instance "
477 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000478
479 def test_copy(self):
480 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000481 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000482 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000483 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000484 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000485 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000486 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000487 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000488 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000489 try:
490 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000491 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000492 finally:
493 try: FILE.close()
494 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000495 self.assertEqual(self.text, text)
496
497 def test_reporthook(self):
498 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700499 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
500 self.assertIsInstance(block_count, int)
501 self.assertIsInstance(block_read_size, int)
502 self.assertIsInstance(file_size, int)
503 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000504 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000505 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000506 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000507 urllib.request.urlretrieve(
508 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000509 second_temp, hooktester)
510
511 def test_reporthook_0_bytes(self):
512 # Test on zero length file. Should call reporthook only 1 time.
513 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700514 def hooktester(block_count, block_read_size, file_size, _report=report):
515 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000516 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000517 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000518 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000519 self.assertEqual(len(report), 1)
520 self.assertEqual(report[0][2], 0)
521
522 def test_reporthook_5_bytes(self):
523 # Test on 5 byte file. Should call reporthook only 2 times (once when
524 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700525 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000526 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700527 def hooktester(block_count, block_read_size, file_size, _report=report):
528 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000529 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000530 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000531 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000532 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800533 self.assertEqual(report[0][2], 5)
534 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000535
536 def test_reporthook_8193_bytes(self):
537 # Test on 8193 byte file. Should call reporthook only 3 times (once
538 # when the "network connection" is established, once for the next 8192
539 # bytes, and once for the last byte).
540 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700541 def hooktester(block_count, block_read_size, file_size, _report=report):
542 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000543 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000544 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000545 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000546 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800547 self.assertEqual(report[0][2], 8193)
548 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700549 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800550 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000551
Senthil Kumarance260142011-11-01 01:35:17 +0800552
553class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
554 """Test urllib.urlretrieve() using fake http connections"""
555
556 def test_short_content_raises_ContentTooShortError(self):
557 self.fakehttp(b'''HTTP/1.1 200 OK
558Date: Wed, 02 Jan 2008 03:03:54 GMT
559Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
560Connection: close
561Content-Length: 100
562Content-Type: text/html; charset=iso-8859-1
563
564FF
565''')
566
567 def _reporthook(par1, par2, par3):
568 pass
569
570 with self.assertRaises(urllib.error.ContentTooShortError):
571 try:
572 urllib.request.urlretrieve('http://example.com/',
573 reporthook=_reporthook)
574 finally:
575 self.unfakehttp()
576
577 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
578 self.fakehttp(b'''HTTP/1.1 200 OK
579Date: Wed, 02 Jan 2008 03:03:54 GMT
580Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
581Connection: close
582Content-Length: 100
583Content-Type: text/html; charset=iso-8859-1
584
585FF
586''')
587 with self.assertRaises(urllib.error.ContentTooShortError):
588 try:
589 urllib.request.urlretrieve('http://example.com/')
590 finally:
591 self.unfakehttp()
592
593
Brett Cannon74bfd702003-04-25 09:39:47 +0000594class QuotingTests(unittest.TestCase):
595 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000596
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000597 According to RFC 2396 (Uniform Resource Identifiers), to escape a
598 character you write it as '%' + <2 character US-ASCII hex value>.
599 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
600 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000601
602 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000603
Brett Cannon74bfd702003-04-25 09:39:47 +0000604 Reserved characters : ";/?:@&=+$,"
605 Have special meaning in URIs and must be escaped if not being used for
606 their special meaning
607 Data characters : letters, digits, and "-_.!~*'()"
608 Unreserved and do not need to be escaped; can be, though, if desired
609 Control characters : 0x00 - 0x1F, 0x7F
610 Have no use in URIs so must be escaped
611 space : 0x20
612 Must be escaped
613 Delimiters : '<>#%"'
614 Must be escaped
615 Unwise : "{}|\^[]`"
616 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000617
Brett Cannon74bfd702003-04-25 09:39:47 +0000618 """
619
620 def test_never_quote(self):
621 # Make sure quote() does not quote letters, digits, and "_,.-"
622 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
623 "abcdefghijklmnopqrstuvwxyz",
624 "0123456789",
625 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000626 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000627 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000628 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000629 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000630 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000631 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000632
633 def test_default_safe(self):
634 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000635 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000636
637 def test_safe(self):
638 # Test setting 'safe' parameter does what it should do
639 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000640 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000641 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000642 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000643 result = urllib.parse.quote_plus(quote_by_default,
644 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000645 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000646 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000647 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000648 # Safe expressed as bytes rather than str
649 result = urllib.parse.quote(quote_by_default, safe=b"<>")
650 self.assertEqual(quote_by_default, result,
651 "using quote(): %r != %r" % (quote_by_default, result))
652 # "Safe" non-ASCII characters should have no effect
653 # (Since URIs are not allowed to have non-ASCII characters)
654 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
655 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
656 self.assertEqual(expect, result,
657 "using quote(): %r != %r" %
658 (expect, result))
659 # Same as above, but using a bytes rather than str
660 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
661 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
662 self.assertEqual(expect, result,
663 "using quote(): %r != %r" %
664 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000665
666 def test_default_quoting(self):
667 # Make sure all characters that should be quoted are by default sans
668 # space (separate test for that).
669 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
670 should_quote.append('<>#%"{}|\^[]`')
671 should_quote.append(chr(127)) # For 0x7F
672 should_quote = ''.join(should_quote)
673 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000674 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000675 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000676 "using quote(): "
677 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000678 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000679 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000680 self.assertEqual(hexescape(char), result,
681 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000682 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000683 (char, hexescape(char), result))
684 del should_quote
685 partial_quote = "ab[]cd"
686 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000687 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000688 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000689 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800690 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000691 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000692 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000693
694 def test_quoting_space(self):
695 # Make sure quote() and quote_plus() handle spaces as specified in
696 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000697 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000698 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000699 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000700 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000701 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000702 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000703 given = "a b cd e f"
704 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000705 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000706 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000707 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000708 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000709 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000710 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000711 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000712
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000713 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000714 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000715 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000716 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000717 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000718 # Test with bytes
719 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
720 'alpha%2Bbeta+gamma')
721 # Test with safe bytes
722 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
723 'alpha+beta+gamma')
724
725 def test_quote_bytes(self):
726 # Bytes should quote directly to percent-encoded values
727 given = b"\xa2\xd8ab\xff"
728 expect = "%A2%D8ab%FF"
729 result = urllib.parse.quote(given)
730 self.assertEqual(expect, result,
731 "using quote(): %r != %r" % (expect, result))
732 # Encoding argument should raise type error on bytes input
733 self.assertRaises(TypeError, urllib.parse.quote, given,
734 encoding="latin-1")
735 # quote_from_bytes should work the same
736 result = urllib.parse.quote_from_bytes(given)
737 self.assertEqual(expect, result,
738 "using quote_from_bytes(): %r != %r"
739 % (expect, result))
740
741 def test_quote_with_unicode(self):
742 # Characters in Latin-1 range, encoded by default in UTF-8
743 given = "\xa2\xd8ab\xff"
744 expect = "%C2%A2%C3%98ab%C3%BF"
745 result = urllib.parse.quote(given)
746 self.assertEqual(expect, result,
747 "using quote(): %r != %r" % (expect, result))
748 # Characters in Latin-1 range, encoded by with None (default)
749 result = urllib.parse.quote(given, encoding=None, errors=None)
750 self.assertEqual(expect, result,
751 "using quote(): %r != %r" % (expect, result))
752 # Characters in Latin-1 range, encoded with Latin-1
753 given = "\xa2\xd8ab\xff"
754 expect = "%A2%D8ab%FF"
755 result = urllib.parse.quote(given, encoding="latin-1")
756 self.assertEqual(expect, result,
757 "using quote(): %r != %r" % (expect, result))
758 # Characters in BMP, encoded by default in UTF-8
759 given = "\u6f22\u5b57" # "Kanji"
760 expect = "%E6%BC%A2%E5%AD%97"
761 result = urllib.parse.quote(given)
762 self.assertEqual(expect, result,
763 "using quote(): %r != %r" % (expect, result))
764 # Characters in BMP, encoded with Latin-1
765 given = "\u6f22\u5b57"
766 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
767 encoding="latin-1")
768 # Characters in BMP, encoded with Latin-1, with replace error handling
769 given = "\u6f22\u5b57"
770 expect = "%3F%3F" # "??"
771 result = urllib.parse.quote(given, encoding="latin-1",
772 errors="replace")
773 self.assertEqual(expect, result,
774 "using quote(): %r != %r" % (expect, result))
775 # Characters in BMP, Latin-1, with xmlcharref error handling
776 given = "\u6f22\u5b57"
777 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
778 result = urllib.parse.quote(given, encoding="latin-1",
779 errors="xmlcharrefreplace")
780 self.assertEqual(expect, result,
781 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000782
Georg Brandlfaf41492009-05-26 18:31:11 +0000783 def test_quote_plus_with_unicode(self):
784 # Encoding (latin-1) test for quote_plus
785 given = "\xa2\xd8 \xff"
786 expect = "%A2%D8+%FF"
787 result = urllib.parse.quote_plus(given, encoding="latin-1")
788 self.assertEqual(expect, result,
789 "using quote_plus(): %r != %r" % (expect, result))
790 # Errors test for quote_plus
791 given = "ab\u6f22\u5b57 cd"
792 expect = "ab%3F%3F+cd"
793 result = urllib.parse.quote_plus(given, encoding="latin-1",
794 errors="replace")
795 self.assertEqual(expect, result,
796 "using quote_plus(): %r != %r" % (expect, result))
797
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000798
Brett Cannon74bfd702003-04-25 09:39:47 +0000799class UnquotingTests(unittest.TestCase):
800 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000801
Brett Cannon74bfd702003-04-25 09:39:47 +0000802 See the doc string for quoting_Tests for details on quoting and such.
803
804 """
805
806 def test_unquoting(self):
807 # Make sure unquoting of all ASCII values works
808 escape_list = []
809 for num in range(128):
810 given = hexescape(chr(num))
811 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000812 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000813 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000814 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000815 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000816 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000817 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000818 (expect, result))
819 escape_list.append(given)
820 escape_string = ''.join(escape_list)
821 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000823 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000824 "using unquote(): not all characters escaped: "
825 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000826 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
827 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000828 with support.check_warnings(('', BytesWarning), quiet=True):
829 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000830
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000831 def test_unquoting_badpercent(self):
832 # Test unquoting on bad percent-escapes
833 given = '%xab'
834 expect = given
835 result = urllib.parse.unquote(given)
836 self.assertEqual(expect, result, "using unquote(): %r != %r"
837 % (expect, result))
838 given = '%x'
839 expect = given
840 result = urllib.parse.unquote(given)
841 self.assertEqual(expect, result, "using unquote(): %r != %r"
842 % (expect, result))
843 given = '%'
844 expect = given
845 result = urllib.parse.unquote(given)
846 self.assertEqual(expect, result, "using unquote(): %r != %r"
847 % (expect, result))
848 # unquote_to_bytes
849 given = '%xab'
850 expect = bytes(given, 'ascii')
851 result = urllib.parse.unquote_to_bytes(given)
852 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
853 % (expect, result))
854 given = '%x'
855 expect = bytes(given, 'ascii')
856 result = urllib.parse.unquote_to_bytes(given)
857 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
858 % (expect, result))
859 given = '%'
860 expect = bytes(given, 'ascii')
861 result = urllib.parse.unquote_to_bytes(given)
862 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
863 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000864 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
865 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000866
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000867 def test_unquoting_mixed_case(self):
868 # Test unquoting on mixed-case hex digits in the percent-escapes
869 given = '%Ab%eA'
870 expect = b'\xab\xea'
871 result = urllib.parse.unquote_to_bytes(given)
872 self.assertEqual(expect, result,
873 "using unquote_to_bytes(): %r != %r"
874 % (expect, result))
875
Brett Cannon74bfd702003-04-25 09:39:47 +0000876 def test_unquoting_parts(self):
877 # Make sure unquoting works when have non-quoted characters
878 # interspersed
879 given = 'ab%sd' % hexescape('c')
880 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000881 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000882 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000883 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000884 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000885 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000886 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000887
Brett Cannon74bfd702003-04-25 09:39:47 +0000888 def test_unquoting_plus(self):
889 # Test difference between unquote() and unquote_plus()
890 given = "are+there+spaces..."
891 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000892 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000893 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000894 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000895 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000896 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000897 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000898 "using unquote_plus(): %r != %r" % (expect, result))
899
900 def test_unquote_to_bytes(self):
901 given = 'br%C3%BCckner_sapporo_20050930.doc'
902 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
903 result = urllib.parse.unquote_to_bytes(given)
904 self.assertEqual(expect, result,
905 "using unquote_to_bytes(): %r != %r"
906 % (expect, result))
907 # Test on a string with unescaped non-ASCII characters
908 # (Technically an invalid URI; expect those characters to be UTF-8
909 # encoded).
910 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
911 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
912 self.assertEqual(expect, result,
913 "using unquote_to_bytes(): %r != %r"
914 % (expect, result))
915 # Test with a bytes as input
916 given = b'%A2%D8ab%FF'
917 expect = b'\xa2\xd8ab\xff'
918 result = urllib.parse.unquote_to_bytes(given)
919 self.assertEqual(expect, result,
920 "using unquote_to_bytes(): %r != %r"
921 % (expect, result))
922 # Test with a bytes as input, with unescaped non-ASCII bytes
923 # (Technically an invalid URI; expect those bytes to be preserved)
924 given = b'%A2\xd8ab%FF'
925 expect = b'\xa2\xd8ab\xff'
926 result = urllib.parse.unquote_to_bytes(given)
927 self.assertEqual(expect, result,
928 "using unquote_to_bytes(): %r != %r"
929 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000930
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000931 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000932 # Characters in the Latin-1 range, encoded with UTF-8
933 given = 'br%C3%BCckner_sapporo_20050930.doc'
934 expect = 'br\u00fcckner_sapporo_20050930.doc'
935 result = urllib.parse.unquote(given)
936 self.assertEqual(expect, result,
937 "using unquote(): %r != %r" % (expect, result))
938 # Characters in the Latin-1 range, encoded with None (default)
939 result = urllib.parse.unquote(given, encoding=None, errors=None)
940 self.assertEqual(expect, result,
941 "using unquote(): %r != %r" % (expect, result))
942
943 # Characters in the Latin-1 range, encoded with Latin-1
944 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
945 encoding="latin-1")
946 expect = 'br\u00fcckner_sapporo_20050930.doc'
947 self.assertEqual(expect, result,
948 "using unquote(): %r != %r" % (expect, result))
949
950 # Characters in BMP, encoded with UTF-8
951 given = "%E6%BC%A2%E5%AD%97"
952 expect = "\u6f22\u5b57" # "Kanji"
953 result = urllib.parse.unquote(given)
954 self.assertEqual(expect, result,
955 "using unquote(): %r != %r" % (expect, result))
956
957 # Decode with UTF-8, invalid sequence
958 given = "%F3%B1"
959 expect = "\ufffd" # Replacement character
960 result = urllib.parse.unquote(given)
961 self.assertEqual(expect, result,
962 "using unquote(): %r != %r" % (expect, result))
963
964 # Decode with UTF-8, invalid sequence, replace errors
965 result = urllib.parse.unquote(given, errors="replace")
966 self.assertEqual(expect, result,
967 "using unquote(): %r != %r" % (expect, result))
968
969 # Decode with UTF-8, invalid sequence, ignoring errors
970 given = "%F3%B1"
971 expect = ""
972 result = urllib.parse.unquote(given, errors="ignore")
973 self.assertEqual(expect, result,
974 "using unquote(): %r != %r" % (expect, result))
975
976 # A mix of non-ASCII and percent-encoded characters, UTF-8
977 result = urllib.parse.unquote("\u6f22%C3%BC")
978 expect = '\u6f22\u00fc'
979 self.assertEqual(expect, result,
980 "using unquote(): %r != %r" % (expect, result))
981
982 # A mix of non-ASCII and percent-encoded characters, Latin-1
983 # (Note, the string contains non-Latin-1-representable characters)
984 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
985 expect = '\u6f22\u00fc'
986 self.assertEqual(expect, result,
987 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000988
Brett Cannon74bfd702003-04-25 09:39:47 +0000989class urlencode_Tests(unittest.TestCase):
990 """Tests for urlencode()"""
991
992 def help_inputtype(self, given, test_type):
993 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000994
Brett Cannon74bfd702003-04-25 09:39:47 +0000995 'given' must lead to only the pairs:
996 * 1st, 1
997 * 2nd, 2
998 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000999
Brett Cannon74bfd702003-04-25 09:39:47 +00001000 Test cannot assume anything about order. Docs make no guarantee and
1001 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001002
Brett Cannon74bfd702003-04-25 09:39:47 +00001003 """
1004 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001005 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001006 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001007 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001008 "testing %s: %s not found in %s" %
1009 (test_type, expected, result))
1010 self.assertEqual(result.count('&'), 2,
1011 "testing %s: expected 2 '&'s; got %s" %
1012 (test_type, result.count('&')))
1013 amp_location = result.index('&')
1014 on_amp_left = result[amp_location - 1]
1015 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001016 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001017 "testing %s: '&' not located in proper place in %s" %
1018 (test_type, result))
1019 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1020 "testing %s: "
1021 "unexpected number of characters: %s != %s" %
1022 (test_type, len(result), (5 * 3) + 2))
1023
1024 def test_using_mapping(self):
1025 # Test passing in a mapping object as an argument.
1026 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1027 "using dict as input type")
1028
1029 def test_using_sequence(self):
1030 # Test passing in a sequence of two-item sequences as an argument.
1031 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1032 "using sequence of two-item tuples as input")
1033
1034 def test_quoting(self):
1035 # Make sure keys and values are quoted using quote_plus()
1036 given = {"&":"="}
1037 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001038 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001039 self.assertEqual(expect, result)
1040 given = {"key name":"A bunch of pluses"}
1041 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001042 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001043 self.assertEqual(expect, result)
1044
1045 def test_doseq(self):
1046 # Test that passing True for 'doseq' parameter works correctly
1047 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001048 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1049 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001050 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001051 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001052 for value in given["sequence"]:
1053 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001054 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001055 self.assertEqual(result.count('&'), 2,
1056 "Expected 2 '&'s, got %s" % result.count('&'))
1057
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001058 def test_empty_sequence(self):
1059 self.assertEqual("", urllib.parse.urlencode({}))
1060 self.assertEqual("", urllib.parse.urlencode([]))
1061
1062 def test_nonstring_values(self):
1063 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1064 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1065
1066 def test_nonstring_seq_values(self):
1067 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1068 self.assertEqual("a=None&a=a",
1069 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001070 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001071 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001072 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001073
Senthil Kumarandf022da2010-07-03 17:48:22 +00001074 def test_urlencode_encoding(self):
1075 # ASCII encoding. Expect %3F with errors="replace'
1076 given = (('\u00a0', '\u00c1'),)
1077 expect = '%3F=%3F'
1078 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1079 self.assertEqual(expect, result)
1080
1081 # Default is UTF-8 encoding.
1082 given = (('\u00a0', '\u00c1'),)
1083 expect = '%C2%A0=%C3%81'
1084 result = urllib.parse.urlencode(given)
1085 self.assertEqual(expect, result)
1086
1087 # Latin-1 encoding.
1088 given = (('\u00a0', '\u00c1'),)
1089 expect = '%A0=%C1'
1090 result = urllib.parse.urlencode(given, encoding="latin-1")
1091 self.assertEqual(expect, result)
1092
1093 def test_urlencode_encoding_doseq(self):
1094 # ASCII Encoding. Expect %3F with errors="replace'
1095 given = (('\u00a0', '\u00c1'),)
1096 expect = '%3F=%3F'
1097 result = urllib.parse.urlencode(given, doseq=True,
1098 encoding="ASCII", errors="replace")
1099 self.assertEqual(expect, result)
1100
1101 # ASCII Encoding. On a sequence of values.
1102 given = (("\u00a0", (1, "\u00c1")),)
1103 expect = '%3F=1&%3F=%3F'
1104 result = urllib.parse.urlencode(given, True,
1105 encoding="ASCII", errors="replace")
1106 self.assertEqual(expect, result)
1107
1108 # Utf-8
1109 given = (("\u00a0", "\u00c1"),)
1110 expect = '%C2%A0=%C3%81'
1111 result = urllib.parse.urlencode(given, True)
1112 self.assertEqual(expect, result)
1113
1114 given = (("\u00a0", (42, "\u00c1")),)
1115 expect = '%C2%A0=42&%C2%A0=%C3%81'
1116 result = urllib.parse.urlencode(given, True)
1117 self.assertEqual(expect, result)
1118
1119 # latin-1
1120 given = (("\u00a0", "\u00c1"),)
1121 expect = '%A0=%C1'
1122 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1123 self.assertEqual(expect, result)
1124
1125 given = (("\u00a0", (42, "\u00c1")),)
1126 expect = '%A0=42&%A0=%C1'
1127 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1128 self.assertEqual(expect, result)
1129
1130 def test_urlencode_bytes(self):
1131 given = ((b'\xa0\x24', b'\xc1\x24'),)
1132 expect = '%A0%24=%C1%24'
1133 result = urllib.parse.urlencode(given)
1134 self.assertEqual(expect, result)
1135 result = urllib.parse.urlencode(given, True)
1136 self.assertEqual(expect, result)
1137
1138 # Sequence of values
1139 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1140 expect = '%A0%24=42&%A0%24=%C1%24'
1141 result = urllib.parse.urlencode(given, True)
1142 self.assertEqual(expect, result)
1143
1144 def test_urlencode_encoding_safe_parameter(self):
1145
1146 # Send '$' (\x24) as safe character
1147 # Default utf-8 encoding
1148
1149 given = ((b'\xa0\x24', b'\xc1\x24'),)
1150 result = urllib.parse.urlencode(given, safe=":$")
1151 expect = '%A0$=%C1$'
1152 self.assertEqual(expect, result)
1153
1154 given = ((b'\xa0\x24', b'\xc1\x24'),)
1155 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1156 expect = '%A0$=%C1$'
1157 self.assertEqual(expect, result)
1158
1159 # Safe parameter in sequence
1160 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1161 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1162 result = urllib.parse.urlencode(given, True, safe=":$")
1163 self.assertEqual(expect, result)
1164
1165 # Test all above in latin-1 encoding
1166
1167 given = ((b'\xa0\x24', b'\xc1\x24'),)
1168 result = urllib.parse.urlencode(given, safe=":$",
1169 encoding="latin-1")
1170 expect = '%A0$=%C1$'
1171 self.assertEqual(expect, result)
1172
1173 given = ((b'\xa0\x24', b'\xc1\x24'),)
1174 expect = '%A0$=%C1$'
1175 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1176 encoding="latin-1")
1177
1178 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1179 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1180 result = urllib.parse.urlencode(given, True, safe=":$",
1181 encoding="latin-1")
1182 self.assertEqual(expect, result)
1183
Brett Cannon74bfd702003-04-25 09:39:47 +00001184class Pathname_Tests(unittest.TestCase):
1185 """Test pathname2url() and url2pathname()"""
1186
1187 def test_basic(self):
1188 # Make sure simple tests pass
1189 expected_path = os.path.join("parts", "of", "a", "path")
1190 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001191 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001192 self.assertEqual(expected_url, result,
1193 "pathname2url() failed; %s != %s" %
1194 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001195 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001196 self.assertEqual(expected_path, result,
1197 "url2pathame() failed; %s != %s" %
1198 (result, expected_path))
1199
1200 def test_quoting(self):
1201 # Test automatic quoting and unquoting works for pathnam2url() and
1202 # url2pathname() respectively
1203 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001204 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1205 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001206 self.assertEqual(expect, result,
1207 "pathname2url() failed; %s != %s" %
1208 (expect, result))
1209 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001210 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001211 self.assertEqual(expect, result,
1212 "url2pathname() failed; %s != %s" %
1213 (expect, result))
1214 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001215 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1216 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001217 self.assertEqual(expect, result,
1218 "pathname2url() failed; %s != %s" %
1219 (expect, result))
1220 given = "make+sure/using_unquote"
1221 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001222 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001223 self.assertEqual(expect, result,
1224 "url2pathname() failed; %s != %s" %
1225 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001226
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001227 @unittest.skipUnless(sys.platform == 'win32',
1228 'test specific to the urllib.url2path function.')
1229 def test_ntpath(self):
1230 given = ('/C:/', '///C:/', '/C|//')
1231 expect = 'C:\\'
1232 for url in given:
1233 result = urllib.request.url2pathname(url)
1234 self.assertEqual(expect, result,
1235 'urllib.request..url2pathname() failed; %s != %s' %
1236 (expect, result))
1237 given = '///C|/path'
1238 expect = 'C:\\path'
1239 result = urllib.request.url2pathname(given)
1240 self.assertEqual(expect, result,
1241 'urllib.request.url2pathname() failed; %s != %s' %
1242 (expect, result))
1243
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001244class Utility_Tests(unittest.TestCase):
1245 """Testcase to test the various utility functions in the urllib."""
1246
1247 def test_splitpasswd(self):
1248 """Some of password examples are not sensible, but it is added to
1249 confirming to RFC2617 and addressing issue4675.
1250 """
1251 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1252 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1253 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1254 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1255 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1256 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1257 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001258 self.assertEqual(('user', 'a b'),urllib.parse.splitpasswd('user:a b'))
1259 self.assertEqual(('user 2', 'ab'),urllib.parse.splitpasswd('user 2:ab'))
1260 self.assertEqual(('user+1', 'a+b'),urllib.parse.splitpasswd('user+1:a+b'))
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001261
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001262 def test_thishost(self):
1263 """Test the urllib.request.thishost utility function returns a tuple"""
1264 self.assertIsInstance(urllib.request.thishost(), tuple)
1265
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001266
1267class URLopener_Tests(unittest.TestCase):
1268 """Testcase to test the open method of URLopener class."""
1269
1270 def test_quoted_open(self):
1271 class DummyURLopener(urllib.request.URLopener):
1272 def open_spam(self, url):
1273 return url
1274
1275 self.assertEqual(DummyURLopener().open(
1276 'spam://example/ /'),'//example/%20/')
1277
Senthil Kumaran734f0592010-02-20 22:19:04 +00001278 # test the safe characters are not quoted by urlopen
1279 self.assertEqual(DummyURLopener().open(
1280 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1281 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1282
Guido van Rossume7ba4952007-06-06 23:52:48 +00001283# Just commented them out.
1284# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001285# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001286# fail in one of the tests, sometimes in other. I have a linux, and
1287# the tests go ok.
1288# If anybody has one of the problematic enviroments, please help!
1289# . Facundo
1290#
1291# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001292# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001293# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1294# serv.settimeout(3)
1295# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1296# serv.bind(("", 9093))
1297# serv.listen(5)
1298# try:
1299# conn, addr = serv.accept()
1300# conn.send("1 Hola mundo\n")
1301# cantdata = 0
1302# while cantdata < 13:
1303# data = conn.recv(13-cantdata)
1304# cantdata += len(data)
1305# time.sleep(.3)
1306# conn.send("2 No more lines\n")
1307# conn.close()
1308# except socket.timeout:
1309# pass
1310# finally:
1311# serv.close()
1312# evt.set()
1313#
1314# class FTPWrapperTests(unittest.TestCase):
1315#
1316# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001317# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001318# ftplib.FTP.port = 9093
1319# self.evt = threading.Event()
1320# threading.Thread(target=server, args=(self.evt,)).start()
1321# time.sleep(.1)
1322#
1323# def tearDown(self):
1324# self.evt.wait()
1325#
1326# def testBasic(self):
1327# # connects
1328# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001329# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001330#
1331# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001332# # global default timeout is ignored
1333# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001334# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001335# socket.setdefaulttimeout(30)
1336# try:
1337# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1338# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001339# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001340# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001341# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001342#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001343# def testTimeoutDefault(self):
1344# # global default timeout is used
1345# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001346# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001347# socket.setdefaulttimeout(30)
1348# try:
1349# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1350# finally:
1351# socket.setdefaulttimeout(None)
1352# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1353# ftp.close()
1354#
1355# def testTimeoutValue(self):
1356# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1357# timeout=30)
1358# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1359# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001360
Senthil Kumarande49d642011-10-16 23:54:44 +08001361class RequestTests(unittest.TestCase):
1362 """Unit tests for urllib.request.Request."""
1363
1364 def test_default_values(self):
1365 Request = urllib.request.Request
1366 request = Request("http://www.python.org")
1367 self.assertEqual(request.get_method(), 'GET')
1368 request = Request("http://www.python.org", {})
1369 self.assertEqual(request.get_method(), 'POST')
1370
1371 def test_with_method_arg(self):
1372 Request = urllib.request.Request
1373 request = Request("http://www.python.org", method='HEAD')
1374 self.assertEqual(request.method, 'HEAD')
1375 self.assertEqual(request.get_method(), 'HEAD')
1376 request = Request("http://www.python.org", {}, method='HEAD')
1377 self.assertEqual(request.method, 'HEAD')
1378 self.assertEqual(request.get_method(), 'HEAD')
1379 request = Request("http://www.python.org", method='GET')
1380 self.assertEqual(request.get_method(), 'GET')
1381 request.method = 'HEAD'
1382 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001383
1384
Brett Cannon74bfd702003-04-25 09:39:47 +00001385def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001386 support.run_unittest(
Walter Dörwald21d3a322003-05-01 17:45:56 +00001387 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +00001388 urlopen_HttpTests,
Antoine Pitroudf204be2012-11-24 17:59:08 +01001389 urlopen_DataTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001390 urlretrieve_FileTests,
Senthil Kumarance260142011-11-01 01:35:17 +08001391 urlretrieve_HttpTests,
Benjamin Peterson9bc93512008-09-22 22:10:59 +00001392 ProxyTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001393 QuotingTests,
1394 UnquotingTests,
1395 urlencode_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001396 Pathname_Tests,
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001397 Utility_Tests,
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001398 URLopener_Tests,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001399 #FTPWrapperTests,
Senthil Kumarande49d642011-10-16 23:54:44 +08001400 RequestTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +00001401 )
Brett Cannon74bfd702003-04-25 09:39:47 +00001402
1403
1404
1405if __name__ == '__main__':
1406 test_main()