blob: 94f640b923c95b8b77f5e3706db1210423ec3cf0 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
4import urllib.request
guido@google.coma119df92011-03-29 11:41:02 -07005import urllib.error
Georg Brandl24420152008-05-26 16:32:26 +00006import http.client
Barry Warsaw820c1202008-06-12 04:06:45 +00007import email.message
Jeremy Hylton66dc8c52007-08-04 03:42:26 +00008import io
Brett Cannon74bfd702003-04-25 09:39:47 +00009import unittest
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test import support
Brett Cannon74bfd702003-04-25 09:39:47 +000011import os
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +080012import sys
Georg Brandl5a650a22005-08-26 08:51:34 +000013import tempfile
Senthil Kumaran277e9092013-04-10 20:51:19 -070014from nturl2path import url2pathname, pathname2url
Jeremy Hylton6102e292000-08-31 15:48:10 +000015
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080016from base64 import b64encode
Georg Brandl2daf6ae2012-02-20 19:54:16 +010017import collections
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080018
Senthil Kumaran8b081b72013-04-10 20:53:12 -070019
Brett Cannon74bfd702003-04-25 09:39:47 +000020def hexescape(char):
21 """Escape char as RFC 2396 specifies"""
22 hex_repr = hex(ord(char))[2:].upper()
23 if len(hex_repr) == 1:
24 hex_repr = "0%s" % hex_repr
25 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000026
Jeremy Hylton1afc1692008-06-18 20:49:58 +000027# Shortcut for testing FancyURLopener
28_urlopener = None
Senthil Kumaran277e9092013-04-10 20:51:19 -070029
30
Jeremy Hylton1afc1692008-06-18 20:49:58 +000031def urlopen(url, data=None, proxies=None):
32 """urlopen(url [, data]) -> open file-like object"""
33 global _urlopener
34 if proxies is not None:
35 opener = urllib.request.FancyURLopener(proxies=proxies)
36 elif not _urlopener:
Ezio Melotti79b99db2013-02-21 02:41:42 +020037 with support.check_warnings(
38 ('FancyURLopener style of invoking requests is deprecated.',
39 DeprecationWarning)):
40 opener = urllib.request.FancyURLopener()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 _urlopener = opener
42 else:
43 opener = _urlopener
44 if data is None:
45 return opener.open(url)
46 else:
47 return opener.open(url, data)
48
Senthil Kumarance260142011-11-01 01:35:17 +080049
50class FakeHTTPMixin(object):
51 def fakehttp(self, fakedata):
52 class FakeSocket(io.BytesIO):
53 io_refs = 1
54
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080055 def sendall(self, data):
56 FakeHTTPConnection.buf = data
Senthil Kumarance260142011-11-01 01:35:17 +080057
58 def makefile(self, *args, **kwds):
59 self.io_refs += 1
60 return self
61
62 def read(self, amt=None):
63 if self.closed:
64 return b""
65 return io.BytesIO.read(self, amt)
66
67 def readline(self, length=None):
68 if self.closed:
69 return b""
70 return io.BytesIO.readline(self, length)
71
72 def close(self):
73 self.io_refs -= 1
74 if self.io_refs == 0:
75 io.BytesIO.close(self)
76
77 class FakeHTTPConnection(http.client.HTTPConnection):
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080078
79 # buffer to store data for verification in urlopen tests.
80 buf = None
81
Senthil Kumarance260142011-11-01 01:35:17 +080082 def connect(self):
83 self.sock = FakeSocket(fakedata)
Senthil Kumaranc5c5a142012-01-14 19:09:04 +080084
Senthil Kumarance260142011-11-01 01:35:17 +080085 self._connection_class = http.client.HTTPConnection
86 http.client.HTTPConnection = FakeHTTPConnection
87
88 def unfakehttp(self):
89 http.client.HTTPConnection = self._connection_class
90
91
Brett Cannon74bfd702003-04-25 09:39:47 +000092class urlopen_FileTests(unittest.TestCase):
93 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000094
Brett Cannon74bfd702003-04-25 09:39:47 +000095 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000096 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000097
Brett Cannon74bfd702003-04-25 09:39:47 +000098 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000099
Brett Cannon74bfd702003-04-25 09:39:47 +0000100 def setUp(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101 # Create a temp file to use for testing
102 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
103 "ascii")
104 f = open(support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +0000105 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000106 f.write(self.text)
Brett Cannon74bfd702003-04-25 09:39:47 +0000107 finally:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108 f.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000109 self.pathname = support.TESTFN
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000110 self.returned_obj = urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000111
Brett Cannon74bfd702003-04-25 09:39:47 +0000112 def tearDown(self):
113 """Shut down the open object"""
114 self.returned_obj.close()
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000115 os.remove(support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +0000116
Brett Cannon74bfd702003-04-25 09:39:47 +0000117 def test_interface(self):
118 # Make sure object returned by urlopen() has the specified methods
119 for attr in ("read", "readline", "readlines", "fileno",
Christian Heimes9bd667a2008-01-20 15:14:11 +0000120 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000121 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +0000122 "object returned by urlopen() lacks %s attribute" %
123 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000124
Brett Cannon74bfd702003-04-25 09:39:47 +0000125 def test_read(self):
126 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +0000127
Brett Cannon74bfd702003-04-25 09:39:47 +0000128 def test_readline(self):
129 self.assertEqual(self.text, self.returned_obj.readline())
Guido van Rossuma0982942007-07-10 08:30:03 +0000130 self.assertEqual(b'', self.returned_obj.readline(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000131 "calling readline() after exhausting the file did not"
132 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000133
Brett Cannon74bfd702003-04-25 09:39:47 +0000134 def test_readlines(self):
135 lines_list = self.returned_obj.readlines()
136 self.assertEqual(len(lines_list), 1,
137 "readlines() returned the wrong number of lines")
138 self.assertEqual(lines_list[0], self.text,
139 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000140
Brett Cannon74bfd702003-04-25 09:39:47 +0000141 def test_fileno(self):
142 file_num = self.returned_obj.fileno()
Ezio Melottie9615932010-01-24 19:26:24 +0000143 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000144 self.assertEqual(os.read(file_num, len(self.text)), self.text,
145 "Reading on the file descriptor returned by fileno() "
146 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000147
Brett Cannon74bfd702003-04-25 09:39:47 +0000148 def test_close(self):
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800149 # Test close() by calling it here and then having it be called again
Brett Cannon74bfd702003-04-25 09:39:47 +0000150 # by the tearDown() method for the test
151 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000152
Brett Cannon74bfd702003-04-25 09:39:47 +0000153 def test_info(self):
Ezio Melottie9615932010-01-24 19:26:24 +0000154 self.assertIsInstance(self.returned_obj.info(), email.message.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000155
Brett Cannon74bfd702003-04-25 09:39:47 +0000156 def test_geturl(self):
157 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000158
Christian Heimes9bd667a2008-01-20 15:14:11 +0000159 def test_getcode(self):
Florent Xicluna419e3842010-08-08 16:16:07 +0000160 self.assertIsNone(self.returned_obj.getcode())
Christian Heimes9bd667a2008-01-20 15:14:11 +0000161
Brett Cannon74bfd702003-04-25 09:39:47 +0000162 def test_iter(self):
163 # Test iterator
164 # Don't need to count number of iterations since test would fail the
165 # instant it returned anything beyond the first line from the
Raymond Hettinger038018a2011-06-26 14:29:35 +0200166 # comparison.
167 # Use the iterator in the usual implicit way to test for ticket #4608.
168 for line in self.returned_obj:
Brett Cannon74bfd702003-04-25 09:39:47 +0000169 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000170
Senthil Kumaran3800ea92012-01-21 11:52:48 +0800171 def test_relativelocalfile(self):
172 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
173
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000174class ProxyTests(unittest.TestCase):
175
176 def setUp(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000177 # Records changes to env vars
178 self.env = support.EnvironmentVarGuard()
Benjamin Peterson46a99002010-01-09 18:45:30 +0000179 # Delete all proxy related env vars
Antoine Pitroub3a88b52010-10-14 18:31:39 +0000180 for k in list(os.environ):
Antoine Pitrou8c8f1ac2010-10-14 18:32:54 +0000181 if 'proxy' in k.lower():
Benjamin Peterson46a99002010-01-09 18:45:30 +0000182 self.env.unset(k)
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000183
184 def tearDown(self):
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000185 # Restore all proxy related env vars
Walter Dörwaldb525e182009-04-26 21:39:21 +0000186 self.env.__exit__()
187 del self.env
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000188
189 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwaldb525e182009-04-26 21:39:21 +0000190 self.env.set('NO_PROXY', 'localhost')
191 proxies = urllib.request.getproxies_environment()
192 # getproxies_environment use lowered case truncated (no '_proxy') keys
Florent Xicluna419e3842010-08-08 16:16:07 +0000193 self.assertEqual('localhost', proxies['no'])
Senthil Kumaran89976f12011-08-06 12:27:40 +0800194 # List of no_proxies with space.
195 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
196 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson9bc93512008-09-22 22:10:59 +0000197
Senthil Kumarance260142011-11-01 01:35:17 +0800198class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000199 """Test urlopen() opening a fake http connection."""
200
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000201 def check_read(self, ver):
202 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000203 try:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000204 fp = urlopen("http://python.org/")
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000205 self.assertEqual(fp.readline(), b"Hello!")
206 self.assertEqual(fp.readline(), b"")
Christian Heimes9bd667a2008-01-20 15:14:11 +0000207 self.assertEqual(fp.geturl(), 'http://python.org/')
208 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000209 finally:
210 self.unfakehttp()
211
Senthil Kumaran26430412011-04-13 07:01:19 +0800212 def test_url_fragment(self):
213 # Issue #11703: geturl() omits fragments in the original URL.
214 url = 'http://docs.python.org/library/urllib.html#OK'
Senthil Kumaranb17abb12011-04-13 07:22:29 +0800215 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaran26430412011-04-13 07:01:19 +0800216 try:
217 fp = urllib.request.urlopen(url)
218 self.assertEqual(fp.geturl(), url)
219 finally:
220 self.unfakehttp()
221
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800222 def test_willclose(self):
223 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
Senthil Kumaranacbaa922011-03-20 05:30:16 +0800224 try:
225 resp = urlopen("http://www.python.org")
226 self.assertTrue(resp.fp.will_close)
227 finally:
228 self.unfakehttp()
Senthil Kumarand91ffca2011-03-19 17:25:27 +0800229
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000230 def test_read_0_9(self):
231 # "0.9" response accepted (but not "simple responses" without
232 # a status line)
233 self.check_read(b"0.9")
234
235 def test_read_1_0(self):
236 self.check_read(b"1.0")
237
238 def test_read_1_1(self):
239 self.check_read(b"1.1")
240
Christian Heimes57dddfb2008-01-02 18:30:52 +0000241 def test_read_bogus(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200242 # urlopen() should raise OSError for many error codes.
Christian Heimes57dddfb2008-01-02 18:30:52 +0000243 self.fakehttp(b'''HTTP/1.1 401 Authentication Required
244Date: Wed, 02 Jan 2008 03:03:54 GMT
245Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
246Connection: close
247Content-Type: text/html; charset=iso-8859-1
248''')
249 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200250 self.assertRaises(OSError, urlopen, "http://python.org/")
Christian Heimes57dddfb2008-01-02 18:30:52 +0000251 finally:
252 self.unfakehttp()
253
guido@google.coma119df92011-03-29 11:41:02 -0700254 def test_invalid_redirect(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200255 # urlopen() should raise OSError for many error codes.
guido@google.coma119df92011-03-29 11:41:02 -0700256 self.fakehttp(b'''HTTP/1.1 302 Found
257Date: Wed, 02 Jan 2008 03:03:54 GMT
258Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
259Location: file://guidocomputer.athome.com:/python/license
260Connection: close
261Content-Type: text/html; charset=iso-8859-1
262''')
263 try:
264 self.assertRaises(urllib.error.HTTPError, urlopen,
265 "http://python.org/")
266 finally:
267 self.unfakehttp()
268
Guido van Rossumd8faa362007-04-27 19:54:29 +0000269 def test_empty_socket(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 # urlopen() raises OSError if the underlying socket does not send any
Jeremy Hylton66dc8c52007-08-04 03:42:26 +0000271 # data. (#1680230)
Christian Heimes57dddfb2008-01-02 18:30:52 +0000272 self.fakehttp(b'')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000273 try:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200274 self.assertRaises(OSError, urlopen, "http://something")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000275 finally:
276 self.unfakehttp()
277
Senthil Kumaranf5776862012-10-21 13:30:02 -0700278 def test_missing_localfile(self):
279 # Test for #10836
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700280 with self.assertRaises(urllib.error.URLError) as e:
Senthil Kumaranf5776862012-10-21 13:30:02 -0700281 urlopen('file://localhost/a/file/which/doesnot/exists.py')
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700282 self.assertTrue(e.exception.filename)
283 self.assertTrue(e.exception.reason)
284
285 def test_file_notexists(self):
286 fd, tmp_file = tempfile.mkstemp()
Senthil Kumaran3194d7c2012-10-23 09:40:53 -0700287 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
Senthil Kumaranbf644c52012-10-23 11:07:02 -0700288 try:
289 self.assertTrue(os.path.exists(tmp_file))
290 with urlopen(tmp_fileurl) as fobj:
291 self.assertTrue(fobj)
292 finally:
293 os.close(fd)
294 os.unlink(tmp_file)
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700295 self.assertFalse(os.path.exists(tmp_file))
296 with self.assertRaises(urllib.error.URLError):
297 urlopen(tmp_fileurl)
298
299 def test_ftp_nohost(self):
300 test_ftp_url = 'ftp:///path'
301 with self.assertRaises(urllib.error.URLError) as e:
302 urlopen(test_ftp_url)
303 self.assertFalse(e.exception.filename)
304 self.assertTrue(e.exception.reason)
305
306 def test_ftp_nonexisting(self):
307 with self.assertRaises(urllib.error.URLError) as e:
308 urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
309 self.assertFalse(e.exception.filename)
310 self.assertTrue(e.exception.reason)
311
Senthil Kumaranf5776862012-10-21 13:30:02 -0700312
Senthil Kumarande0eb242010-08-01 17:53:37 +0000313 def test_userpass_inurl(self):
Antoine Pitrou988dbd72010-12-17 17:35:56 +0000314 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
Senthil Kumarande0eb242010-08-01 17:53:37 +0000315 try:
316 fp = urlopen("http://user:pass@python.org/")
317 self.assertEqual(fp.readline(), b"Hello!")
318 self.assertEqual(fp.readline(), b"")
319 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
320 self.assertEqual(fp.getcode(), 200)
321 finally:
322 self.unfakehttp()
323
Senthil Kumaranc5c5a142012-01-14 19:09:04 +0800324 def test_userpass_inurl_w_spaces(self):
325 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
326 try:
327 userpass = "a b:c d"
328 url = "http://{}@python.org/".format(userpass)
329 fakehttp_wrapper = http.client.HTTPConnection
330 authorization = ("Authorization: Basic %s\r\n" %
331 b64encode(userpass.encode("ASCII")).decode("ASCII"))
332 fp = urlopen(url)
333 # The authorization header must be in place
334 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
335 self.assertEqual(fp.readline(), b"Hello!")
336 self.assertEqual(fp.readline(), b"")
337 # the spaces are quoted in URL so no match
338 self.assertNotEqual(fp.geturl(), url)
339 self.assertEqual(fp.getcode(), 200)
340 finally:
341 self.unfakehttp()
342
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700343 def test_URLopener_deprecation(self):
344 with support.check_warnings(('',DeprecationWarning)):
Senthil Kumaran3ebef362012-10-21 18:31:25 -0700345 urllib.request.URLopener()
Senthil Kumaran38b968b92012-03-14 13:43:53 -0700346
Antoine Pitroudf204be2012-11-24 17:59:08 +0100347class urlopen_DataTests(unittest.TestCase):
348 """Test urlopen() opening a data URL."""
349
350 def setUp(self):
351 # text containing URL special- and unicode-characters
352 self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
353 # 2x1 pixel RGB PNG image with one black and one white pixel
354 self.image = (
355 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
356 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
357 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
358 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
359
360 self.text_url = (
361 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
362 "D%26%20%C3%B6%20%C3%84%20")
363 self.text_url_base64 = (
364 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
365 "sJT0mIPYgxCA%3D")
366 # base64 encoded data URL that contains ignorable spaces,
367 # such as "\n", " ", "%0A", and "%20".
368 self.image_url = (
369 "\n"
370 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
371 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
372
373 self.text_url_resp = urllib.request.urlopen(self.text_url)
374 self.text_url_base64_resp = urllib.request.urlopen(
375 self.text_url_base64)
376 self.image_url_resp = urllib.request.urlopen(self.image_url)
377
378 def test_interface(self):
379 # Make sure object returned by urlopen() has the specified methods
380 for attr in ("read", "readline", "readlines",
381 "close", "info", "geturl", "getcode", "__iter__"):
382 self.assertTrue(hasattr(self.text_url_resp, attr),
383 "object returned by urlopen() lacks %s attribute" %
384 attr)
385
386 def test_info(self):
387 self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
388 self.assertEqual(self.text_url_base64_resp.info().get_params(),
389 [('text/plain', ''), ('charset', 'ISO-8859-1')])
390 self.assertEqual(self.image_url_resp.info()['content-length'],
391 str(len(self.image)))
392 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
393 [('text/plain', ''), ('charset', 'US-ASCII')])
394
395 def test_geturl(self):
396 self.assertEqual(self.text_url_resp.geturl(), self.text_url)
397 self.assertEqual(self.text_url_base64_resp.geturl(),
398 self.text_url_base64)
399 self.assertEqual(self.image_url_resp.geturl(), self.image_url)
400
401 def test_read_text(self):
402 self.assertEqual(self.text_url_resp.read().decode(
403 dict(self.text_url_resp.info().get_params())['charset']), self.text)
404
405 def test_read_text_base64(self):
406 self.assertEqual(self.text_url_base64_resp.read().decode(
407 dict(self.text_url_base64_resp.info().get_params())['charset']),
408 self.text)
409
410 def test_read_image(self):
411 self.assertEqual(self.image_url_resp.read(), self.image)
412
413 def test_missing_comma(self):
414 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
415
416 def test_invalid_base64_data(self):
417 # missing padding character
418 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
419
Brett Cannon19691362003-04-29 05:08:06 +0000420class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000421 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000422
Brett Cannon19691362003-04-29 05:08:06 +0000423 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000424 # Create a list of temporary files. Each item in the list is a file
425 # name (absolute path or relative to the current working directory).
426 # All files in this list will be deleted in the tearDown method. Note,
427 # this only helps to makes sure temporary files get deleted, but it
428 # does nothing about trying to close files that may still be open. It
429 # is the responsibility of the developer to properly close files even
430 # when exceptional conditions occur.
431 self.tempFiles = []
432
Brett Cannon19691362003-04-29 05:08:06 +0000433 # Create a temporary file.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000434 self.registerFileForCleanUp(support.TESTFN)
Guido van Rossuma0982942007-07-10 08:30:03 +0000435 self.text = b'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000436 try:
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000437 FILE = open(support.TESTFN, 'wb')
Georg Brandl5a650a22005-08-26 08:51:34 +0000438 FILE.write(self.text)
439 FILE.close()
440 finally:
441 try: FILE.close()
442 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000443
444 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000445 # Delete the temporary files.
446 for each in self.tempFiles:
447 try: os.remove(each)
448 except: pass
449
450 def constructLocalFileUrl(self, filePath):
Victor Stinner6c6f8512010-08-07 10:09:35 +0000451 filePath = os.path.abspath(filePath)
452 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000453 filePath.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +0000454 except UnicodeEncodeError:
455 raise unittest.SkipTest("filePath is not encodable to utf8")
456 return "file://%s" % urllib.request.pathname2url(filePath)
Georg Brandl5a650a22005-08-26 08:51:34 +0000457
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000458 def createNewTempFile(self, data=b""):
Georg Brandl5a650a22005-08-26 08:51:34 +0000459 """Creates a new temporary file containing the specified data,
460 registers the file for deletion during the test fixture tear down, and
461 returns the absolute path of the file."""
462
463 newFd, newFilePath = tempfile.mkstemp()
464 try:
465 self.registerFileForCleanUp(newFilePath)
466 newFile = os.fdopen(newFd, "wb")
467 newFile.write(data)
468 newFile.close()
469 finally:
470 try: newFile.close()
471 except: pass
472 return newFilePath
473
474 def registerFileForCleanUp(self, fileName):
475 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000476
477 def test_basic(self):
478 # Make sure that a local file just gets its own location returned and
479 # a headers value is returned.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000480 result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000481 self.assertEqual(result[0], support.TESTFN)
Ezio Melottie9615932010-01-24 19:26:24 +0000482 self.assertIsInstance(result[1], email.message.Message,
483 "did not get a email.message.Message instance "
484 "as second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000485
486 def test_copy(self):
487 # Test that setting the filename argument works.
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000488 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000489 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000490 result = urllib.request.urlretrieve(self.constructLocalFileUrl(
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000491 support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000492 self.assertEqual(second_temp, result[0])
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000493 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000494 "made")
Alex Martelli01c77c62006-08-24 02:58:11 +0000495 FILE = open(second_temp, 'rb')
Brett Cannon19691362003-04-29 05:08:06 +0000496 try:
497 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000498 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000499 finally:
500 try: FILE.close()
501 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000502 self.assertEqual(self.text, text)
503
504 def test_reporthook(self):
505 # Make sure that the reporthook works.
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700506 def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
507 self.assertIsInstance(block_count, int)
508 self.assertIsInstance(block_read_size, int)
509 self.assertIsInstance(file_size, int)
510 self.assertEqual(block_count, count_holder[0])
Brett Cannon19691362003-04-29 05:08:06 +0000511 count_holder[0] = count_holder[0] + 1
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000512 second_temp = "%s.2" % support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000513 self.registerFileForCleanUp(second_temp)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000514 urllib.request.urlretrieve(
515 self.constructLocalFileUrl(support.TESTFN),
Georg Brandl5a650a22005-08-26 08:51:34 +0000516 second_temp, hooktester)
517
518 def test_reporthook_0_bytes(self):
519 # Test on zero length file. Should call reporthook only 1 time.
520 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700521 def hooktester(block_count, block_read_size, file_size, _report=report):
522 _report.append((block_count, block_read_size, file_size))
Georg Brandl5a650a22005-08-26 08:51:34 +0000523 srcFileName = self.createNewTempFile()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000524 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000525 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000526 self.assertEqual(len(report), 1)
527 self.assertEqual(report[0][2], 0)
528
529 def test_reporthook_5_bytes(self):
530 # Test on 5 byte file. Should call reporthook only 2 times (once when
531 # the "network connection" is established and once when the block is
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700532 # read).
Georg Brandl5a650a22005-08-26 08:51:34 +0000533 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700534 def hooktester(block_count, block_read_size, file_size, _report=report):
535 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000536 srcFileName = self.createNewTempFile(b"x" * 5)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000537 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000538 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000539 self.assertEqual(len(report), 2)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800540 self.assertEqual(report[0][2], 5)
541 self.assertEqual(report[1][2], 5)
Georg Brandl5a650a22005-08-26 08:51:34 +0000542
543 def test_reporthook_8193_bytes(self):
544 # Test on 8193 byte file. Should call reporthook only 3 times (once
545 # when the "network connection" is established, once for the next 8192
546 # bytes, and once for the last byte).
547 report = []
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700548 def hooktester(block_count, block_read_size, file_size, _report=report):
549 _report.append((block_count, block_read_size, file_size))
Guido van Rossum70d0dda2007-08-29 01:53:26 +0000550 srcFileName = self.createNewTempFile(b"x" * 8193)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000551 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000552 support.TESTFN, hooktester)
Georg Brandl5a650a22005-08-26 08:51:34 +0000553 self.assertEqual(len(report), 3)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800554 self.assertEqual(report[0][2], 8193)
555 self.assertEqual(report[0][1], 8192)
Senthil Kumarane24f96a2012-03-13 19:29:33 -0700556 self.assertEqual(report[1][1], 8192)
Gregory P. Smith6d9388f2012-11-10 15:12:55 -0800557 self.assertEqual(report[2][1], 8192)
Skip Montanaro080c9972001-01-28 21:12:22 +0000558
Senthil Kumarance260142011-11-01 01:35:17 +0800559
560class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
561 """Test urllib.urlretrieve() using fake http connections"""
562
563 def test_short_content_raises_ContentTooShortError(self):
564 self.fakehttp(b'''HTTP/1.1 200 OK
565Date: Wed, 02 Jan 2008 03:03:54 GMT
566Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
567Connection: close
568Content-Length: 100
569Content-Type: text/html; charset=iso-8859-1
570
571FF
572''')
573
574 def _reporthook(par1, par2, par3):
575 pass
576
577 with self.assertRaises(urllib.error.ContentTooShortError):
578 try:
579 urllib.request.urlretrieve('http://example.com/',
580 reporthook=_reporthook)
581 finally:
582 self.unfakehttp()
583
584 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
585 self.fakehttp(b'''HTTP/1.1 200 OK
586Date: Wed, 02 Jan 2008 03:03:54 GMT
587Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
588Connection: close
589Content-Length: 100
590Content-Type: text/html; charset=iso-8859-1
591
592FF
593''')
594 with self.assertRaises(urllib.error.ContentTooShortError):
595 try:
596 urllib.request.urlretrieve('http://example.com/')
597 finally:
598 self.unfakehttp()
599
600
Brett Cannon74bfd702003-04-25 09:39:47 +0000601class QuotingTests(unittest.TestCase):
602 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000603
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000604 According to RFC 2396 (Uniform Resource Identifiers), to escape a
605 character you write it as '%' + <2 character US-ASCII hex value>.
606 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
607 character properly. Case does not matter on the hex letters.
Brett Cannon74bfd702003-04-25 09:39:47 +0000608
609 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000610
Brett Cannon74bfd702003-04-25 09:39:47 +0000611 Reserved characters : ";/?:@&=+$,"
612 Have special meaning in URIs and must be escaped if not being used for
613 their special meaning
614 Data characters : letters, digits, and "-_.!~*'()"
615 Unreserved and do not need to be escaped; can be, though, if desired
616 Control characters : 0x00 - 0x1F, 0x7F
617 Have no use in URIs so must be escaped
618 space : 0x20
619 Must be escaped
620 Delimiters : '<>#%"'
621 Must be escaped
622 Unwise : "{}|\^[]`"
623 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000624
Brett Cannon74bfd702003-04-25 09:39:47 +0000625 """
626
627 def test_never_quote(self):
628 # Make sure quote() does not quote letters, digits, and "_,.-"
629 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
630 "abcdefghijklmnopqrstuvwxyz",
631 "0123456789",
632 "_.-"])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000633 result = urllib.parse.quote(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000634 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000635 "using quote(): %r != %r" % (do_not_quote, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000636 result = urllib.parse.quote_plus(do_not_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000637 self.assertEqual(do_not_quote, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000638 "using quote_plus(): %r != %r" % (do_not_quote, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000639
640 def test_default_safe(self):
641 # Test '/' is default value for 'safe' parameter
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000642 self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
Brett Cannon74bfd702003-04-25 09:39:47 +0000643
644 def test_safe(self):
645 # Test setting 'safe' parameter does what it should do
646 quote_by_default = "<>"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000647 result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000648 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000649 "using quote(): %r != %r" % (quote_by_default, result))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000650 result = urllib.parse.quote_plus(quote_by_default,
651 safe=quote_by_default)
Brett Cannon74bfd702003-04-25 09:39:47 +0000652 self.assertEqual(quote_by_default, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000653 "using quote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000654 (quote_by_default, result))
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000655 # Safe expressed as bytes rather than str
656 result = urllib.parse.quote(quote_by_default, safe=b"<>")
657 self.assertEqual(quote_by_default, result,
658 "using quote(): %r != %r" % (quote_by_default, result))
659 # "Safe" non-ASCII characters should have no effect
660 # (Since URIs are not allowed to have non-ASCII characters)
661 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
662 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
663 self.assertEqual(expect, result,
664 "using quote(): %r != %r" %
665 (expect, result))
666 # Same as above, but using a bytes rather than str
667 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
668 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
669 self.assertEqual(expect, result,
670 "using quote(): %r != %r" %
671 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000672
673 def test_default_quoting(self):
674 # Make sure all characters that should be quoted are by default sans
675 # space (separate test for that).
676 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
677 should_quote.append('<>#%"{}|\^[]`')
678 should_quote.append(chr(127)) # For 0x7F
679 should_quote = ''.join(should_quote)
680 for char in should_quote:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000681 result = urllib.parse.quote(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000682 self.assertEqual(hexescape(char), result,
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +0000683 "using quote(): "
684 "%s should be escaped to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000685 (char, hexescape(char), result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000686 result = urllib.parse.quote_plus(char)
Brett Cannon74bfd702003-04-25 09:39:47 +0000687 self.assertEqual(hexescape(char), result,
688 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000689 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000690 (char, hexescape(char), result))
691 del should_quote
692 partial_quote = "ab[]cd"
693 expected = "ab%5B%5Dcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000694 result = urllib.parse.quote(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000695 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000696 "using quote(): %r != %r" % (expected, result))
Senthil Kumaran305a68e2011-09-13 06:40:27 +0800697 result = urllib.parse.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000698 self.assertEqual(expected, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000699 "using quote_plus(): %r != %r" % (expected, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000700
701 def test_quoting_space(self):
702 # Make sure quote() and quote_plus() handle spaces as specified in
703 # their unique way
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000704 result = urllib.parse.quote(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000705 self.assertEqual(result, hexescape(' '),
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000706 "using quote(): %r != %r" % (result, hexescape(' ')))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000707 result = urllib.parse.quote_plus(' ')
Brett Cannon74bfd702003-04-25 09:39:47 +0000708 self.assertEqual(result, '+',
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000709 "using quote_plus(): %r != +" % result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000710 given = "a b cd e f"
711 expect = given.replace(' ', hexescape(' '))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000712 result = urllib.parse.quote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000713 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000714 "using quote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000715 expect = given.replace(' ', '+')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000716 result = urllib.parse.quote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000717 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000718 "using quote_plus(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000719
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000720 def test_quoting_plus(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000721 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000722 'alpha%2Bbeta+gamma')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000723 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000724 'alpha+beta+gamma')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000725 # Test with bytes
726 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
727 'alpha%2Bbeta+gamma')
728 # Test with safe bytes
729 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
730 'alpha+beta+gamma')
731
732 def test_quote_bytes(self):
733 # Bytes should quote directly to percent-encoded values
734 given = b"\xa2\xd8ab\xff"
735 expect = "%A2%D8ab%FF"
736 result = urllib.parse.quote(given)
737 self.assertEqual(expect, result,
738 "using quote(): %r != %r" % (expect, result))
739 # Encoding argument should raise type error on bytes input
740 self.assertRaises(TypeError, urllib.parse.quote, given,
741 encoding="latin-1")
742 # quote_from_bytes should work the same
743 result = urllib.parse.quote_from_bytes(given)
744 self.assertEqual(expect, result,
745 "using quote_from_bytes(): %r != %r"
746 % (expect, result))
747
748 def test_quote_with_unicode(self):
749 # Characters in Latin-1 range, encoded by default in UTF-8
750 given = "\xa2\xd8ab\xff"
751 expect = "%C2%A2%C3%98ab%C3%BF"
752 result = urllib.parse.quote(given)
753 self.assertEqual(expect, result,
754 "using quote(): %r != %r" % (expect, result))
755 # Characters in Latin-1 range, encoded by with None (default)
756 result = urllib.parse.quote(given, encoding=None, errors=None)
757 self.assertEqual(expect, result,
758 "using quote(): %r != %r" % (expect, result))
759 # Characters in Latin-1 range, encoded with Latin-1
760 given = "\xa2\xd8ab\xff"
761 expect = "%A2%D8ab%FF"
762 result = urllib.parse.quote(given, encoding="latin-1")
763 self.assertEqual(expect, result,
764 "using quote(): %r != %r" % (expect, result))
765 # Characters in BMP, encoded by default in UTF-8
766 given = "\u6f22\u5b57" # "Kanji"
767 expect = "%E6%BC%A2%E5%AD%97"
768 result = urllib.parse.quote(given)
769 self.assertEqual(expect, result,
770 "using quote(): %r != %r" % (expect, result))
771 # Characters in BMP, encoded with Latin-1
772 given = "\u6f22\u5b57"
773 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
774 encoding="latin-1")
775 # Characters in BMP, encoded with Latin-1, with replace error handling
776 given = "\u6f22\u5b57"
777 expect = "%3F%3F" # "??"
778 result = urllib.parse.quote(given, encoding="latin-1",
779 errors="replace")
780 self.assertEqual(expect, result,
781 "using quote(): %r != %r" % (expect, result))
782 # Characters in BMP, Latin-1, with xmlcharref error handling
783 given = "\u6f22\u5b57"
784 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
785 result = urllib.parse.quote(given, encoding="latin-1",
786 errors="xmlcharrefreplace")
787 self.assertEqual(expect, result,
788 "using quote(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000789
Georg Brandlfaf41492009-05-26 18:31:11 +0000790 def test_quote_plus_with_unicode(self):
791 # Encoding (latin-1) test for quote_plus
792 given = "\xa2\xd8 \xff"
793 expect = "%A2%D8+%FF"
794 result = urllib.parse.quote_plus(given, encoding="latin-1")
795 self.assertEqual(expect, result,
796 "using quote_plus(): %r != %r" % (expect, result))
797 # Errors test for quote_plus
798 given = "ab\u6f22\u5b57 cd"
799 expect = "ab%3F%3F+cd"
800 result = urllib.parse.quote_plus(given, encoding="latin-1",
801 errors="replace")
802 self.assertEqual(expect, result,
803 "using quote_plus(): %r != %r" % (expect, result))
804
Senthil Kumarand496c4c2010-07-30 19:34:36 +0000805
Brett Cannon74bfd702003-04-25 09:39:47 +0000806class UnquotingTests(unittest.TestCase):
807 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000808
Brett Cannon74bfd702003-04-25 09:39:47 +0000809 See the doc string for quoting_Tests for details on quoting and such.
810
811 """
812
813 def test_unquoting(self):
814 # Make sure unquoting of all ASCII values works
815 escape_list = []
816 for num in range(128):
817 given = hexescape(chr(num))
818 expect = chr(num)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000819 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000820 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000821 "using unquote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000823 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000824 "using unquote_plus(): %r != %r" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000825 (expect, result))
826 escape_list.append(given)
827 escape_string = ''.join(escape_list)
828 del escape_list
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000829 result = urllib.parse.unquote(escape_string)
Brett Cannon74bfd702003-04-25 09:39:47 +0000830 self.assertEqual(result.count('%'), 1,
Brett Cannon74bfd702003-04-25 09:39:47 +0000831 "using unquote(): not all characters escaped: "
832 "%s" % result)
Georg Brandl604ef372010-07-31 08:20:02 +0000833 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
834 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
Florent Xicluna62829dc2010-08-14 20:51:58 +0000835 with support.check_warnings(('', BytesWarning), quiet=True):
836 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
Brett Cannon74bfd702003-04-25 09:39:47 +0000837
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000838 def test_unquoting_badpercent(self):
839 # Test unquoting on bad percent-escapes
840 given = '%xab'
841 expect = given
842 result = urllib.parse.unquote(given)
843 self.assertEqual(expect, result, "using unquote(): %r != %r"
844 % (expect, result))
845 given = '%x'
846 expect = given
847 result = urllib.parse.unquote(given)
848 self.assertEqual(expect, result, "using unquote(): %r != %r"
849 % (expect, result))
850 given = '%'
851 expect = given
852 result = urllib.parse.unquote(given)
853 self.assertEqual(expect, result, "using unquote(): %r != %r"
854 % (expect, result))
855 # unquote_to_bytes
856 given = '%xab'
857 expect = bytes(given, 'ascii')
858 result = urllib.parse.unquote_to_bytes(given)
859 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
860 % (expect, result))
861 given = '%x'
862 expect = bytes(given, 'ascii')
863 result = urllib.parse.unquote_to_bytes(given)
864 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
865 % (expect, result))
866 given = '%'
867 expect = bytes(given, 'ascii')
868 result = urllib.parse.unquote_to_bytes(given)
869 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
870 % (expect, result))
Georg Brandl604ef372010-07-31 08:20:02 +0000871 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
872 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
Senthil Kumaran79e17f62010-07-19 18:17:19 +0000873
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000874 def test_unquoting_mixed_case(self):
875 # Test unquoting on mixed-case hex digits in the percent-escapes
876 given = '%Ab%eA'
877 expect = b'\xab\xea'
878 result = urllib.parse.unquote_to_bytes(given)
879 self.assertEqual(expect, result,
880 "using unquote_to_bytes(): %r != %r"
881 % (expect, result))
882
Brett Cannon74bfd702003-04-25 09:39:47 +0000883 def test_unquoting_parts(self):
884 # Make sure unquoting works when have non-quoted characters
885 # interspersed
886 given = 'ab%sd' % hexescape('c')
887 expect = "abcd"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000888 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000889 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000890 "using quote(): %r != %r" % (expect, result))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000891 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000892 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000893 "using unquote_plus(): %r != %r" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000894
Brett Cannon74bfd702003-04-25 09:39:47 +0000895 def test_unquoting_plus(self):
896 # Test difference between unquote() and unquote_plus()
897 given = "are+there+spaces..."
898 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000899 result = urllib.parse.unquote(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000900 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000901 "using unquote(): %r != %r" % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000902 expect = given.replace('+', ' ')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000903 result = urllib.parse.unquote_plus(given)
Brett Cannon74bfd702003-04-25 09:39:47 +0000904 self.assertEqual(expect, result,
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000905 "using unquote_plus(): %r != %r" % (expect, result))
906
907 def test_unquote_to_bytes(self):
908 given = 'br%C3%BCckner_sapporo_20050930.doc'
909 expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
910 result = urllib.parse.unquote_to_bytes(given)
911 self.assertEqual(expect, result,
912 "using unquote_to_bytes(): %r != %r"
913 % (expect, result))
914 # Test on a string with unescaped non-ASCII characters
915 # (Technically an invalid URI; expect those characters to be UTF-8
916 # encoded).
917 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
918 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
919 self.assertEqual(expect, result,
920 "using unquote_to_bytes(): %r != %r"
921 % (expect, result))
922 # Test with a bytes as input
923 given = b'%A2%D8ab%FF'
924 expect = b'\xa2\xd8ab\xff'
925 result = urllib.parse.unquote_to_bytes(given)
926 self.assertEqual(expect, result,
927 "using unquote_to_bytes(): %r != %r"
928 % (expect, result))
929 # Test with a bytes as input, with unescaped non-ASCII bytes
930 # (Technically an invalid URI; expect those bytes to be preserved)
931 given = b'%A2\xd8ab%FF'
932 expect = b'\xa2\xd8ab\xff'
933 result = urllib.parse.unquote_to_bytes(given)
934 self.assertEqual(expect, result,
935 "using unquote_to_bytes(): %r != %r"
936 % (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000937
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000938 def test_unquote_with_unicode(self):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000939 # Characters in the Latin-1 range, encoded with UTF-8
940 given = 'br%C3%BCckner_sapporo_20050930.doc'
941 expect = 'br\u00fcckner_sapporo_20050930.doc'
942 result = urllib.parse.unquote(given)
943 self.assertEqual(expect, result,
944 "using unquote(): %r != %r" % (expect, result))
945 # Characters in the Latin-1 range, encoded with None (default)
946 result = urllib.parse.unquote(given, encoding=None, errors=None)
947 self.assertEqual(expect, result,
948 "using unquote(): %r != %r" % (expect, result))
949
950 # Characters in the Latin-1 range, encoded with Latin-1
951 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
952 encoding="latin-1")
953 expect = 'br\u00fcckner_sapporo_20050930.doc'
954 self.assertEqual(expect, result,
955 "using unquote(): %r != %r" % (expect, result))
956
957 # Characters in BMP, encoded with UTF-8
958 given = "%E6%BC%A2%E5%AD%97"
959 expect = "\u6f22\u5b57" # "Kanji"
960 result = urllib.parse.unquote(given)
961 self.assertEqual(expect, result,
962 "using unquote(): %r != %r" % (expect, result))
963
964 # Decode with UTF-8, invalid sequence
965 given = "%F3%B1"
966 expect = "\ufffd" # Replacement character
967 result = urllib.parse.unquote(given)
968 self.assertEqual(expect, result,
969 "using unquote(): %r != %r" % (expect, result))
970
971 # Decode with UTF-8, invalid sequence, replace errors
972 result = urllib.parse.unquote(given, errors="replace")
973 self.assertEqual(expect, result,
974 "using unquote(): %r != %r" % (expect, result))
975
976 # Decode with UTF-8, invalid sequence, ignoring errors
977 given = "%F3%B1"
978 expect = ""
979 result = urllib.parse.unquote(given, errors="ignore")
980 self.assertEqual(expect, result,
981 "using unquote(): %r != %r" % (expect, result))
982
983 # A mix of non-ASCII and percent-encoded characters, UTF-8
984 result = urllib.parse.unquote("\u6f22%C3%BC")
985 expect = '\u6f22\u00fc'
986 self.assertEqual(expect, result,
987 "using unquote(): %r != %r" % (expect, result))
988
989 # A mix of non-ASCII and percent-encoded characters, Latin-1
990 # (Note, the string contains non-Latin-1-representable characters)
991 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
992 expect = '\u6f22\u00fc'
993 self.assertEqual(expect, result,
994 "using unquote(): %r != %r" % (expect, result))
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000995
Brett Cannon74bfd702003-04-25 09:39:47 +0000996class urlencode_Tests(unittest.TestCase):
997 """Tests for urlencode()"""
998
999 def help_inputtype(self, given, test_type):
1000 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +00001001
Brett Cannon74bfd702003-04-25 09:39:47 +00001002 'given' must lead to only the pairs:
1003 * 1st, 1
1004 * 2nd, 2
1005 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +00001006
Brett Cannon74bfd702003-04-25 09:39:47 +00001007 Test cannot assume anything about order. Docs make no guarantee and
1008 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +00001009
Brett Cannon74bfd702003-04-25 09:39:47 +00001010 """
1011 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001012 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001013 for expected in expect_somewhere:
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001014 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +00001015 "testing %s: %s not found in %s" %
1016 (test_type, expected, result))
1017 self.assertEqual(result.count('&'), 2,
1018 "testing %s: expected 2 '&'s; got %s" %
1019 (test_type, result.count('&')))
1020 amp_location = result.index('&')
1021 on_amp_left = result[amp_location - 1]
1022 on_amp_right = result[amp_location + 1]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001023 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +00001024 "testing %s: '&' not located in proper place in %s" %
1025 (test_type, result))
1026 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1027 "testing %s: "
1028 "unexpected number of characters: %s != %s" %
1029 (test_type, len(result), (5 * 3) + 2))
1030
1031 def test_using_mapping(self):
1032 # Test passing in a mapping object as an argument.
1033 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1034 "using dict as input type")
1035
1036 def test_using_sequence(self):
1037 # Test passing in a sequence of two-item sequences as an argument.
1038 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1039 "using sequence of two-item tuples as input")
1040
1041 def test_quoting(self):
1042 # Make sure keys and values are quoted using quote_plus()
1043 given = {"&":"="}
1044 expect = "%s=%s" % (hexescape('&'), hexescape('='))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001045 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001046 self.assertEqual(expect, result)
1047 given = {"key name":"A bunch of pluses"}
1048 expect = "key+name=A+bunch+of+pluses"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001049 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001050 self.assertEqual(expect, result)
1051
1052 def test_doseq(self):
1053 # Test that passing True for 'doseq' parameter works correctly
1054 given = {'sequence':['1', '2', '3']}
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001055 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1056 result = urllib.parse.urlencode(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001057 self.assertEqual(expect, result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001058 result = urllib.parse.urlencode(given, True)
Brett Cannon74bfd702003-04-25 09:39:47 +00001059 for value in given["sequence"]:
1060 expect = "sequence=%s" % value
Ezio Melottib58e0bd2010-01-23 15:40:09 +00001061 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001062 self.assertEqual(result.count('&'), 2,
1063 "Expected 2 '&'s, got %s" % result.count('&'))
1064
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001065 def test_empty_sequence(self):
1066 self.assertEqual("", urllib.parse.urlencode({}))
1067 self.assertEqual("", urllib.parse.urlencode([]))
1068
1069 def test_nonstring_values(self):
1070 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1071 self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1072
1073 def test_nonstring_seq_values(self):
1074 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1075 self.assertEqual("a=None&a=a",
1076 urllib.parse.urlencode({"a": [None, "a"]}, True))
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001077 data = collections.OrderedDict([("a", 1), ("b", 1)])
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001078 self.assertEqual("a=a&a=b",
Georg Brandl2daf6ae2012-02-20 19:54:16 +01001079 urllib.parse.urlencode({"a": data}, True))
Jeremy Hylton1ef7c6b2009-03-26 16:57:30 +00001080
Senthil Kumarandf022da2010-07-03 17:48:22 +00001081 def test_urlencode_encoding(self):
1082 # ASCII encoding. Expect %3F with errors="replace'
1083 given = (('\u00a0', '\u00c1'),)
1084 expect = '%3F=%3F'
1085 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1086 self.assertEqual(expect, result)
1087
1088 # Default is UTF-8 encoding.
1089 given = (('\u00a0', '\u00c1'),)
1090 expect = '%C2%A0=%C3%81'
1091 result = urllib.parse.urlencode(given)
1092 self.assertEqual(expect, result)
1093
1094 # Latin-1 encoding.
1095 given = (('\u00a0', '\u00c1'),)
1096 expect = '%A0=%C1'
1097 result = urllib.parse.urlencode(given, encoding="latin-1")
1098 self.assertEqual(expect, result)
1099
1100 def test_urlencode_encoding_doseq(self):
1101 # ASCII Encoding. Expect %3F with errors="replace'
1102 given = (('\u00a0', '\u00c1'),)
1103 expect = '%3F=%3F'
1104 result = urllib.parse.urlencode(given, doseq=True,
1105 encoding="ASCII", errors="replace")
1106 self.assertEqual(expect, result)
1107
1108 # ASCII Encoding. On a sequence of values.
1109 given = (("\u00a0", (1, "\u00c1")),)
1110 expect = '%3F=1&%3F=%3F'
1111 result = urllib.parse.urlencode(given, True,
1112 encoding="ASCII", errors="replace")
1113 self.assertEqual(expect, result)
1114
1115 # Utf-8
1116 given = (("\u00a0", "\u00c1"),)
1117 expect = '%C2%A0=%C3%81'
1118 result = urllib.parse.urlencode(given, True)
1119 self.assertEqual(expect, result)
1120
1121 given = (("\u00a0", (42, "\u00c1")),)
1122 expect = '%C2%A0=42&%C2%A0=%C3%81'
1123 result = urllib.parse.urlencode(given, True)
1124 self.assertEqual(expect, result)
1125
1126 # latin-1
1127 given = (("\u00a0", "\u00c1"),)
1128 expect = '%A0=%C1'
1129 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1130 self.assertEqual(expect, result)
1131
1132 given = (("\u00a0", (42, "\u00c1")),)
1133 expect = '%A0=42&%A0=%C1'
1134 result = urllib.parse.urlencode(given, True, encoding="latin-1")
1135 self.assertEqual(expect, result)
1136
1137 def test_urlencode_bytes(self):
1138 given = ((b'\xa0\x24', b'\xc1\x24'),)
1139 expect = '%A0%24=%C1%24'
1140 result = urllib.parse.urlencode(given)
1141 self.assertEqual(expect, result)
1142 result = urllib.parse.urlencode(given, True)
1143 self.assertEqual(expect, result)
1144
1145 # Sequence of values
1146 given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1147 expect = '%A0%24=42&%A0%24=%C1%24'
1148 result = urllib.parse.urlencode(given, True)
1149 self.assertEqual(expect, result)
1150
1151 def test_urlencode_encoding_safe_parameter(self):
1152
1153 # Send '$' (\x24) as safe character
1154 # Default utf-8 encoding
1155
1156 given = ((b'\xa0\x24', b'\xc1\x24'),)
1157 result = urllib.parse.urlencode(given, safe=":$")
1158 expect = '%A0$=%C1$'
1159 self.assertEqual(expect, result)
1160
1161 given = ((b'\xa0\x24', b'\xc1\x24'),)
1162 result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1163 expect = '%A0$=%C1$'
1164 self.assertEqual(expect, result)
1165
1166 # Safe parameter in sequence
1167 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1168 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1169 result = urllib.parse.urlencode(given, True, safe=":$")
1170 self.assertEqual(expect, result)
1171
1172 # Test all above in latin-1 encoding
1173
1174 given = ((b'\xa0\x24', b'\xc1\x24'),)
1175 result = urllib.parse.urlencode(given, safe=":$",
1176 encoding="latin-1")
1177 expect = '%A0$=%C1$'
1178 self.assertEqual(expect, result)
1179
1180 given = ((b'\xa0\x24', b'\xc1\x24'),)
1181 expect = '%A0$=%C1$'
1182 result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1183 encoding="latin-1")
1184
1185 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1186 expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1187 result = urllib.parse.urlencode(given, True, safe=":$",
1188 encoding="latin-1")
1189 self.assertEqual(expect, result)
1190
Brett Cannon74bfd702003-04-25 09:39:47 +00001191class Pathname_Tests(unittest.TestCase):
1192 """Test pathname2url() and url2pathname()"""
1193
1194 def test_basic(self):
1195 # Make sure simple tests pass
1196 expected_path = os.path.join("parts", "of", "a", "path")
1197 expected_url = "parts/of/a/path"
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001198 result = urllib.request.pathname2url(expected_path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001199 self.assertEqual(expected_url, result,
1200 "pathname2url() failed; %s != %s" %
1201 (result, expected_url))
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001202 result = urllib.request.url2pathname(expected_url)
Brett Cannon74bfd702003-04-25 09:39:47 +00001203 self.assertEqual(expected_path, result,
1204 "url2pathame() failed; %s != %s" %
1205 (result, expected_path))
1206
1207 def test_quoting(self):
1208 # Test automatic quoting and unquoting works for pathnam2url() and
1209 # url2pathname() respectively
1210 given = os.path.join("needs", "quot=ing", "here")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001211 expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1212 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001213 self.assertEqual(expect, result,
1214 "pathname2url() failed; %s != %s" %
1215 (expect, result))
1216 expect = given
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001217 result = urllib.request.url2pathname(result)
Brett Cannon74bfd702003-04-25 09:39:47 +00001218 self.assertEqual(expect, result,
1219 "url2pathname() failed; %s != %s" %
1220 (expect, result))
1221 given = os.path.join("make sure", "using_quote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001222 expect = "%s/using_quote" % urllib.parse.quote("make sure")
1223 result = urllib.request.pathname2url(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001224 self.assertEqual(expect, result,
1225 "pathname2url() failed; %s != %s" %
1226 (expect, result))
1227 given = "make+sure/using_unquote"
1228 expect = os.path.join("make+sure", "using_unquote")
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001229 result = urllib.request.url2pathname(given)
Brett Cannon74bfd702003-04-25 09:39:47 +00001230 self.assertEqual(expect, result,
1231 "url2pathname() failed; %s != %s" %
1232 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +00001233
Senthil Kumaran2d2ea1b2011-04-14 13:16:30 +08001234 @unittest.skipUnless(sys.platform == 'win32',
1235 'test specific to the urllib.url2path function.')
1236 def test_ntpath(self):
1237 given = ('/C:/', '///C:/', '/C|//')
1238 expect = 'C:\\'
1239 for url in given:
1240 result = urllib.request.url2pathname(url)
1241 self.assertEqual(expect, result,
1242 'urllib.request..url2pathname() failed; %s != %s' %
1243 (expect, result))
1244 given = '///C|/path'
1245 expect = 'C:\\path'
1246 result = urllib.request.url2pathname(given)
1247 self.assertEqual(expect, result,
1248 'urllib.request.url2pathname() failed; %s != %s' %
1249 (expect, result))
1250
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001251class Utility_Tests(unittest.TestCase):
1252 """Testcase to test the various utility functions in the urllib."""
1253
1254 def test_splitpasswd(self):
1255 """Some of password examples are not sensible, but it is added to
1256 confirming to RFC2617 and addressing issue4675.
1257 """
1258 self.assertEqual(('user', 'ab'),urllib.parse.splitpasswd('user:ab'))
1259 self.assertEqual(('user', 'a\nb'),urllib.parse.splitpasswd('user:a\nb'))
1260 self.assertEqual(('user', 'a\tb'),urllib.parse.splitpasswd('user:a\tb'))
1261 self.assertEqual(('user', 'a\rb'),urllib.parse.splitpasswd('user:a\rb'))
1262 self.assertEqual(('user', 'a\fb'),urllib.parse.splitpasswd('user:a\fb'))
1263 self.assertEqual(('user', 'a\vb'),urllib.parse.splitpasswd('user:a\vb'))
1264 self.assertEqual(('user', 'a:b'),urllib.parse.splitpasswd('user:a:b'))
Senthil Kumaranc5c5a142012-01-14 19:09:04 +08001265 self.assertEqual(('user', 'a b'),urllib.parse.splitpasswd('user:a b'))
1266 self.assertEqual(('user 2', 'ab'),urllib.parse.splitpasswd('user 2:ab'))
1267 self.assertEqual(('user+1', 'a+b'),urllib.parse.splitpasswd('user+1:a+b'))
Senthil Kumaraneaaec272009-03-30 21:54:41 +00001268
Senthil Kumaran1b7da512011-10-06 00:32:02 +08001269 def test_thishost(self):
1270 """Test the urllib.request.thishost utility function returns a tuple"""
1271 self.assertIsInstance(urllib.request.thishost(), tuple)
1272
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001273
1274class URLopener_Tests(unittest.TestCase):
1275 """Testcase to test the open method of URLopener class."""
1276
1277 def test_quoted_open(self):
1278 class DummyURLopener(urllib.request.URLopener):
1279 def open_spam(self, url):
1280 return url
Ezio Melotti79b99db2013-02-21 02:41:42 +02001281 with support.check_warnings(
1282 ('DummyURLopener style of invoking requests is deprecated.',
1283 DeprecationWarning)):
1284 self.assertEqual(DummyURLopener().open(
1285 'spam://example/ /'),'//example/%20/')
Senthil Kumaran690ce9b2009-05-05 18:41:13 +00001286
Ezio Melotti79b99db2013-02-21 02:41:42 +02001287 # test the safe characters are not quoted by urlopen
1288 self.assertEqual(DummyURLopener().open(
1289 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1290 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
Senthil Kumaran734f0592010-02-20 22:19:04 +00001291
Guido van Rossume7ba4952007-06-06 23:52:48 +00001292# Just commented them out.
1293# Can't really tell why keep failing in windows and sparc.
Ezio Melotti13925002011-03-16 11:05:33 +02001294# Everywhere else they work ok, but on those machines, sometimes
Guido van Rossume7ba4952007-06-06 23:52:48 +00001295# fail in one of the tests, sometimes in other. I have a linux, and
1296# the tests go ok.
Ezio Melotti85a86292013-08-17 16:57:41 +03001297# If anybody has one of the problematic environments, please help!
Guido van Rossume7ba4952007-06-06 23:52:48 +00001298# . Facundo
1299#
1300# def server(evt):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001301# import socket, time
Guido van Rossume7ba4952007-06-06 23:52:48 +00001302# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1303# serv.settimeout(3)
1304# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1305# serv.bind(("", 9093))
1306# serv.listen(5)
1307# try:
1308# conn, addr = serv.accept()
1309# conn.send("1 Hola mundo\n")
1310# cantdata = 0
1311# while cantdata < 13:
1312# data = conn.recv(13-cantdata)
1313# cantdata += len(data)
1314# time.sleep(.3)
1315# conn.send("2 No more lines\n")
1316# conn.close()
1317# except socket.timeout:
1318# pass
1319# finally:
1320# serv.close()
1321# evt.set()
1322#
1323# class FTPWrapperTests(unittest.TestCase):
1324#
1325# def setUp(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001326# import ftplib, time, threading
Guido van Rossume7ba4952007-06-06 23:52:48 +00001327# ftplib.FTP.port = 9093
1328# self.evt = threading.Event()
1329# threading.Thread(target=server, args=(self.evt,)).start()
1330# time.sleep(.1)
1331#
1332# def tearDown(self):
1333# self.evt.wait()
1334#
1335# def testBasic(self):
1336# # connects
1337# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Georg Brandlf78e02b2008-06-10 17:40:04 +00001338# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001339#
1340# def testTimeoutNone(self):
Georg Brandlf78e02b2008-06-10 17:40:04 +00001341# # global default timeout is ignored
1342# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001343# self.assertTrue(socket.getdefaulttimeout() is None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001344# socket.setdefaulttimeout(30)
1345# try:
1346# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1347# finally:
Georg Brandlf78e02b2008-06-10 17:40:04 +00001348# socket.setdefaulttimeout(None)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001349# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001350# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001351#
Georg Brandlf78e02b2008-06-10 17:40:04 +00001352# def testTimeoutDefault(self):
1353# # global default timeout is used
1354# import socket
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001355# self.assertTrue(socket.getdefaulttimeout() is None)
Georg Brandlf78e02b2008-06-10 17:40:04 +00001356# socket.setdefaulttimeout(30)
1357# try:
1358# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1359# finally:
1360# socket.setdefaulttimeout(None)
1361# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1362# ftp.close()
1363#
1364# def testTimeoutValue(self):
1365# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1366# timeout=30)
1367# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1368# ftp.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001369
Senthil Kumaran8b081b72013-04-10 20:53:12 -07001370
Senthil Kumarande49d642011-10-16 23:54:44 +08001371class RequestTests(unittest.TestCase):
1372 """Unit tests for urllib.request.Request."""
1373
1374 def test_default_values(self):
1375 Request = urllib.request.Request
1376 request = Request("http://www.python.org")
1377 self.assertEqual(request.get_method(), 'GET')
1378 request = Request("http://www.python.org", {})
1379 self.assertEqual(request.get_method(), 'POST')
1380
1381 def test_with_method_arg(self):
1382 Request = urllib.request.Request
1383 request = Request("http://www.python.org", method='HEAD')
1384 self.assertEqual(request.method, 'HEAD')
1385 self.assertEqual(request.get_method(), 'HEAD')
1386 request = Request("http://www.python.org", {}, method='HEAD')
1387 self.assertEqual(request.method, 'HEAD')
1388 self.assertEqual(request.get_method(), 'HEAD')
1389 request = Request("http://www.python.org", method='GET')
1390 self.assertEqual(request.get_method(), 'GET')
1391 request.method = 'HEAD'
1392 self.assertEqual(request.get_method(), 'HEAD')
Skip Montanaro080c9972001-01-28 21:12:22 +00001393
1394
Senthil Kumaran277e9092013-04-10 20:51:19 -07001395class URL2PathNameTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +00001396
Senthil Kumaran277e9092013-04-10 20:51:19 -07001397 def test_converting_drive_letter(self):
1398 self.assertEqual(url2pathname("///C|"), 'C:')
1399 self.assertEqual(url2pathname("///C:"), 'C:')
1400 self.assertEqual(url2pathname("///C|/"), 'C:\\')
Brett Cannon74bfd702003-04-25 09:39:47 +00001401
Senthil Kumaran277e9092013-04-10 20:51:19 -07001402 def test_converting_when_no_drive_letter(self):
1403 # cannot end a raw string in \
1404 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1405 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1406
1407 def test_simple_compare(self):
1408 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1409 r'C:\foo\bar\spam.foo')
1410
1411 def test_non_ascii_drive_letter(self):
1412 self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1413
1414 def test_roundtrip_url2pathname(self):
1415 list_of_paths = ['C:',
1416 r'\\\C\test\\',
1417 r'C:\foo\bar\spam.foo'
1418 ]
1419 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001420 self.assertEqual(url2pathname(pathname2url(path)), path)
Senthil Kumaran277e9092013-04-10 20:51:19 -07001421
1422class PathName2URLTests(unittest.TestCase):
1423
1424 def test_converting_drive_letter(self):
1425 self.assertEqual(pathname2url("C:"), '///C:')
1426 self.assertEqual(pathname2url("C:\\"), '///C:')
1427
1428 def test_converting_when_no_drive_letter(self):
1429 self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1430 '/////folder/test/')
1431 self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1432 '////folder/test/')
1433 self.assertEqual(pathname2url(r"\folder\test" "\\"),
1434 '/folder/test/')
1435
1436 def test_simple_compare(self):
1437 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1438 "///C:/foo/bar/spam.foo" )
1439
1440 def test_long_drive_letter(self):
1441 self.assertRaises(IOError, pathname2url, "XX:\\")
1442
1443 def test_roundtrip_pathname2url(self):
1444 list_of_paths = ['///C:',
1445 '/////folder/test/',
1446 '///C:/foo/bar/spam.foo']
1447 for path in list_of_paths:
Senthil Kumaranc7e09802013-04-10 20:54:23 -07001448 self.assertEqual(pathname2url(url2pathname(path)), path)
Brett Cannon74bfd702003-04-25 09:39:47 +00001449
1450if __name__ == '__main__':
Senthil Kumaran277e9092013-04-10 20:51:19 -07001451 unittest.main()