blob: 91aeb2f11666e54fca1e618d177bce1d74ff1419 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton6102e292000-08-31 15:48:10 +00003import urllib
Hye-Shik Chang39aef792004-06-05 13:30:56 +00004import httplib
Brett Cannon74bfd702003-04-25 09:39:47 +00005import unittest
Brett Cannon74bfd702003-04-25 09:39:47 +00006import os
Senthil Kumarana99b7612011-04-14 12:54:35 +08007import sys
Brett Cannon74bfd702003-04-25 09:39:47 +00008import mimetools
Georg Brandl5a650a22005-08-26 08:51:34 +00009import tempfile
Hye-Shik Chang39aef792004-06-05 13:30:56 +000010import StringIO
Jeremy Hylton6102e292000-08-31 15:48:10 +000011
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080012from test import test_support
13from base64 import b64encode
14
15
Brett Cannon74bfd702003-04-25 09:39:47 +000016def hexescape(char):
17 """Escape char as RFC 2396 specifies"""
18 hex_repr = hex(ord(char))[2:].upper()
19 if len(hex_repr) == 1:
20 hex_repr = "0%s" % hex_repr
21 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000022
Senthil Kumaran87e58552011-11-01 02:44:45 +080023
24class FakeHTTPMixin(object):
25 def fakehttp(self, fakedata):
26 class FakeSocket(StringIO.StringIO):
27
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080028 def sendall(self, data):
29 FakeHTTPConnection.buf = data
30
Senthil Kumaran87e58552011-11-01 02:44:45 +080031 def makefile(self, *args, **kwds):
32 return self
33
34 def read(self, amt=None):
35 if self.closed:
36 return ""
37 return StringIO.StringIO.read(self, amt)
38
39 def readline(self, length=None):
40 if self.closed:
41 return ""
42 return StringIO.StringIO.readline(self, length)
43
44 class FakeHTTPConnection(httplib.HTTPConnection):
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080045
46 # buffer to store data for verification in urlopen tests.
47 buf = ""
48
Senthil Kumaran87e58552011-11-01 02:44:45 +080049 def connect(self):
50 self.sock = FakeSocket(fakedata)
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080051
Senthil Kumaran87e58552011-11-01 02:44:45 +080052 assert httplib.HTTP._connection_class == httplib.HTTPConnection
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080053
Senthil Kumaran87e58552011-11-01 02:44:45 +080054 httplib.HTTP._connection_class = FakeHTTPConnection
55
56 def unfakehttp(self):
57 httplib.HTTP._connection_class = httplib.HTTPConnection
58
59
Brett Cannon74bfd702003-04-25 09:39:47 +000060class urlopen_FileTests(unittest.TestCase):
61 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000062
Brett Cannon74bfd702003-04-25 09:39:47 +000063 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000064 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000065
Brett Cannon74bfd702003-04-25 09:39:47 +000066 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000067
Brett Cannon74bfd702003-04-25 09:39:47 +000068 def setUp(self):
69 """Setup of a temp file to use for testing"""
70 self.text = "test_urllib: %s\n" % self.__class__.__name__
Guido van Rossum51735b02003-04-25 15:01:05 +000071 FILE = file(test_support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000072 try:
73 FILE.write(self.text)
74 finally:
75 FILE.close()
76 self.pathname = test_support.TESTFN
77 self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def tearDown(self):
80 """Shut down the open object"""
81 self.returned_obj.close()
Brett Cannon19691362003-04-29 05:08:06 +000082 os.remove(test_support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000083
Brett Cannon74bfd702003-04-25 09:39:47 +000084 def test_interface(self):
85 # Make sure object returned by urlopen() has the specified methods
86 for attr in ("read", "readline", "readlines", "fileno",
Georg Brandl9b0d46d2008-01-20 11:43:03 +000087 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Peterson5c8da862009-06-30 22:57:08 +000088 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000089 "object returned by urlopen() lacks %s attribute" %
90 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000091
Brett Cannon74bfd702003-04-25 09:39:47 +000092 def test_read(self):
93 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000094
Brett Cannon74bfd702003-04-25 09:39:47 +000095 def test_readline(self):
96 self.assertEqual(self.text, self.returned_obj.readline())
97 self.assertEqual('', self.returned_obj.readline(),
98 "calling readline() after exhausting the file did not"
99 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000100
Brett Cannon74bfd702003-04-25 09:39:47 +0000101 def test_readlines(self):
102 lines_list = self.returned_obj.readlines()
103 self.assertEqual(len(lines_list), 1,
104 "readlines() returned the wrong number of lines")
105 self.assertEqual(lines_list[0], self.text,
106 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000107
Brett Cannon74bfd702003-04-25 09:39:47 +0000108 def test_fileno(self):
109 file_num = self.returned_obj.fileno()
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000110 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000111 self.assertEqual(os.read(file_num, len(self.text)), self.text,
112 "Reading on the file descriptor returned by fileno() "
113 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000114
Brett Cannon74bfd702003-04-25 09:39:47 +0000115 def test_close(self):
116 # Test close() by calling it hear and then having it be called again
117 # by the tearDown() method for the test
118 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000119
Brett Cannon74bfd702003-04-25 09:39:47 +0000120 def test_info(self):
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000121 self.assertIsInstance(self.returned_obj.info(), mimetools.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000122
Brett Cannon74bfd702003-04-25 09:39:47 +0000123 def test_geturl(self):
124 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000125
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000126 def test_getcode(self):
127 self.assertEqual(self.returned_obj.getcode(), None)
128
Brett Cannon74bfd702003-04-25 09:39:47 +0000129 def test_iter(self):
130 # Test iterator
131 # Don't need to count number of iterations since test would fail the
132 # instant it returned anything beyond the first line from the
133 # comparison
134 for line in self.returned_obj.__iter__():
135 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000136
Senthil Kumaran58c60622012-01-21 11:43:02 +0800137 def test_relativelocalfile(self):
138 self.assertRaises(ValueError,urllib.urlopen,'./' + self.pathname)
139
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000140class ProxyTests(unittest.TestCase):
141
142 def setUp(self):
Walter Dörwald4b965f62009-04-26 20:51:44 +0000143 # Records changes to env vars
144 self.env = test_support.EnvironmentVarGuard()
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000145 # Delete all proxy related env vars
Senthil Kumaran7a2ee0b2010-01-08 19:20:25 +0000146 for k in os.environ.keys():
Walter Dörwald4b965f62009-04-26 20:51:44 +0000147 if 'proxy' in k.lower():
Senthil Kumarandc61ec32009-10-01 01:50:13 +0000148 self.env.unset(k)
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000149
150 def tearDown(self):
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000151 # Restore all proxy related env vars
Walter Dörwald4b965f62009-04-26 20:51:44 +0000152 self.env.__exit__()
153 del self.env
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000154
155 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwald4b965f62009-04-26 20:51:44 +0000156 self.env.set('NO_PROXY', 'localhost')
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000157 proxies = urllib.getproxies_environment()
158 # getproxies_environment use lowered case truncated (no '_proxy') keys
Ezio Melotti2623a372010-11-21 13:34:58 +0000159 self.assertEqual('localhost', proxies['no'])
Senthil Kumaranb5bd4c82011-08-06 12:24:33 +0800160 # List of no_proxies with space.
161 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
162 self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000163
164
Senthil Kumaran87e58552011-11-01 02:44:45 +0800165class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000166 """Test urlopen() opening a fake http connection."""
167
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000168 def test_read(self):
169 self.fakehttp('Hello!')
170 try:
171 fp = urllib.urlopen("http://python.org/")
172 self.assertEqual(fp.readline(), 'Hello!')
173 self.assertEqual(fp.readline(), '')
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000174 self.assertEqual(fp.geturl(), 'http://python.org/')
175 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000176 finally:
177 self.unfakehttp()
178
Senthil Kumaran49c44082011-04-13 07:31:45 +0800179 def test_url_fragment(self):
180 # Issue #11703: geturl() omits fragments in the original URL.
181 url = 'http://docs.python.org/library/urllib.html#OK'
182 self.fakehttp('Hello!')
183 try:
184 fp = urllib.urlopen(url)
185 self.assertEqual(fp.geturl(), url)
186 finally:
187 self.unfakehttp()
188
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000189 def test_read_bogus(self):
Kurt B. Kaiser0a112322008-01-02 05:23:38 +0000190 # urlopen() should raise IOError for many error codes.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000191 self.fakehttp('''HTTP/1.1 401 Authentication Required
192Date: Wed, 02 Jan 2008 03:03:54 GMT
193Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
194Connection: close
195Content-Type: text/html; charset=iso-8859-1
196''')
197 try:
198 self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
199 finally:
200 self.unfakehttp()
201
guido@google.comf1509302011-03-28 13:47:01 -0700202 def test_invalid_redirect(self):
203 # urlopen() should raise IOError for many error codes.
204 self.fakehttp("""HTTP/1.1 302 Found
205Date: Wed, 02 Jan 2008 03:03:54 GMT
206Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
207Location: file:README
208Connection: close
209Content-Type: text/html; charset=iso-8859-1
210""")
211 try:
212 self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
213 finally:
214 self.unfakehttp()
215
Georg Brandlf66b6032007-03-14 08:27:52 +0000216 def test_empty_socket(self):
Kurt B. Kaiser0a112322008-01-02 05:23:38 +0000217 # urlopen() raises IOError if the underlying socket does not send any
218 # data. (#1680230)
Georg Brandlf66b6032007-03-14 08:27:52 +0000219 self.fakehttp('')
220 try:
221 self.assertRaises(IOError, urllib.urlopen, 'http://something')
222 finally:
223 self.unfakehttp()
224
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800225 def test_userpass_inurl(self):
226 self.fakehttp('Hello!')
227 try:
228 fakehttp_wrapper = httplib.HTTP._connection_class
229 fp = urllib.urlopen("http://user:pass@python.org/")
230 authorization = ("Authorization: Basic %s\r\n" %
231 b64encode('user:pass'))
232 # The authorization header must be in place
233 self.assertIn(authorization, fakehttp_wrapper.buf)
234 self.assertEqual(fp.readline(), "Hello!")
235 self.assertEqual(fp.readline(), "")
236 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
237 self.assertEqual(fp.getcode(), 200)
238 finally:
239 self.unfakehttp()
240
241 def test_userpass_with_spaces_inurl(self):
242 self.fakehttp('Hello!')
243 try:
244 url = "http://a b:c d@python.org/"
245 fakehttp_wrapper = httplib.HTTP._connection_class
246 authorization = ("Authorization: Basic %s\r\n" %
247 b64encode('a b:c d'))
248 fp = urllib.urlopen(url)
249 # The authorization header must be in place
250 self.assertIn(authorization, fakehttp_wrapper.buf)
251 self.assertEqual(fp.readline(), "Hello!")
252 self.assertEqual(fp.readline(), "")
253 # the spaces are quoted in URL so no match
254 self.assertNotEqual(fp.geturl(), url)
255 self.assertEqual(fp.getcode(), 200)
256 finally:
257 self.unfakehttp()
258
259
Brett Cannon19691362003-04-29 05:08:06 +0000260class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000261 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000262
Brett Cannon19691362003-04-29 05:08:06 +0000263 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000264 # Create a list of temporary files. Each item in the list is a file
265 # name (absolute path or relative to the current working directory).
266 # All files in this list will be deleted in the tearDown method. Note,
267 # this only helps to makes sure temporary files get deleted, but it
268 # does nothing about trying to close files that may still be open. It
269 # is the responsibility of the developer to properly close files even
270 # when exceptional conditions occur.
271 self.tempFiles = []
272
Brett Cannon19691362003-04-29 05:08:06 +0000273 # Create a temporary file.
Georg Brandl5a650a22005-08-26 08:51:34 +0000274 self.registerFileForCleanUp(test_support.TESTFN)
Brett Cannon19691362003-04-29 05:08:06 +0000275 self.text = 'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000276 try:
277 FILE = file(test_support.TESTFN, 'wb')
278 FILE.write(self.text)
279 FILE.close()
280 finally:
281 try: FILE.close()
282 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000283
284 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000285 # Delete the temporary files.
286 for each in self.tempFiles:
287 try: os.remove(each)
288 except: pass
289
290 def constructLocalFileUrl(self, filePath):
291 return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
292
293 def createNewTempFile(self, data=""):
294 """Creates a new temporary file containing the specified data,
295 registers the file for deletion during the test fixture tear down, and
296 returns the absolute path of the file."""
297
298 newFd, newFilePath = tempfile.mkstemp()
299 try:
300 self.registerFileForCleanUp(newFilePath)
301 newFile = os.fdopen(newFd, "wb")
302 newFile.write(data)
303 newFile.close()
304 finally:
305 try: newFile.close()
306 except: pass
307 return newFilePath
308
309 def registerFileForCleanUp(self, fileName):
310 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000311
312 def test_basic(self):
313 # Make sure that a local file just gets its own location returned and
314 # a headers value is returned.
315 result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
316 self.assertEqual(result[0], test_support.TESTFN)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000317 self.assertIsInstance(result[1], mimetools.Message,
318 "did not get a mimetools.Message instance as "
319 "second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000320
321 def test_copy(self):
322 # Test that setting the filename argument works.
323 second_temp = "%s.2" % test_support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000324 self.registerFileForCleanUp(second_temp)
325 result = urllib.urlretrieve(self.constructLocalFileUrl(
326 test_support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000327 self.assertEqual(second_temp, result[0])
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000328 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000329 "made")
330 FILE = file(second_temp, 'rb')
331 try:
332 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000333 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000334 finally:
335 try: FILE.close()
336 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000337 self.assertEqual(self.text, text)
338
339 def test_reporthook(self):
340 # Make sure that the reporthook works.
341 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000342 self.assertIsInstance(count, int)
343 self.assertIsInstance(block_size, int)
344 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000345 self.assertEqual(count, count_holder[0])
346 count_holder[0] = count_holder[0] + 1
347 second_temp = "%s.2" % test_support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000348 self.registerFileForCleanUp(second_temp)
349 urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
350 second_temp, hooktester)
351
352 def test_reporthook_0_bytes(self):
353 # Test on zero length file. Should call reporthook only 1 time.
354 report = []
355 def hooktester(count, block_size, total_size, _report=report):
356 _report.append((count, block_size, total_size))
357 srcFileName = self.createNewTempFile()
358 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
359 test_support.TESTFN, hooktester)
360 self.assertEqual(len(report), 1)
361 self.assertEqual(report[0][2], 0)
362
363 def test_reporthook_5_bytes(self):
364 # Test on 5 byte file. Should call reporthook only 2 times (once when
365 # the "network connection" is established and once when the block is
366 # read). Since the block size is 8192 bytes, only one block read is
367 # required to read the entire file.
368 report = []
369 def hooktester(count, block_size, total_size, _report=report):
370 _report.append((count, block_size, total_size))
371 srcFileName = self.createNewTempFile("x" * 5)
372 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
373 test_support.TESTFN, hooktester)
374 self.assertEqual(len(report), 2)
375 self.assertEqual(report[0][1], 8192)
376 self.assertEqual(report[0][2], 5)
377
378 def test_reporthook_8193_bytes(self):
379 # Test on 8193 byte file. Should call reporthook only 3 times (once
380 # when the "network connection" is established, once for the next 8192
381 # bytes, and once for the last byte).
382 report = []
383 def hooktester(count, block_size, total_size, _report=report):
384 _report.append((count, block_size, total_size))
385 srcFileName = self.createNewTempFile("x" * 8193)
386 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
387 test_support.TESTFN, hooktester)
388 self.assertEqual(len(report), 3)
389 self.assertEqual(report[0][1], 8192)
390 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000391
Senthil Kumaran87e58552011-11-01 02:44:45 +0800392
393class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
394 """Test urllib.urlretrieve() using fake http connections"""
395
396 def test_short_content_raises_ContentTooShortError(self):
397 self.fakehttp('''HTTP/1.1 200 OK
398Date: Wed, 02 Jan 2008 03:03:54 GMT
399Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
400Connection: close
401Content-Length: 100
402Content-Type: text/html; charset=iso-8859-1
403
404FF
405''')
406
407 def _reporthook(par1, par2, par3):
408 pass
409
410 try:
411 self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
412 'http://example.com', reporthook=_reporthook)
413 finally:
414 self.unfakehttp()
415
416 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
417 self.fakehttp('''HTTP/1.1 200 OK
418Date: Wed, 02 Jan 2008 03:03:54 GMT
419Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
420Connection: close
421Content-Length: 100
422Content-Type: text/html; charset=iso-8859-1
423
424FF
425''')
426 try:
427 self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
428 finally:
429 self.unfakehttp()
430
Brett Cannon74bfd702003-04-25 09:39:47 +0000431class QuotingTests(unittest.TestCase):
432 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000433
Brett Cannon74bfd702003-04-25 09:39:47 +0000434 According to RFC 2396 ("Uniform Resource Identifiers), to escape a
435 character you write it as '%' + <2 character US-ASCII hex value>. The Python
436 code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
437 Case does not matter on the hex letters.
438
439 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000440
Brett Cannon74bfd702003-04-25 09:39:47 +0000441 Reserved characters : ";/?:@&=+$,"
442 Have special meaning in URIs and must be escaped if not being used for
443 their special meaning
444 Data characters : letters, digits, and "-_.!~*'()"
445 Unreserved and do not need to be escaped; can be, though, if desired
446 Control characters : 0x00 - 0x1F, 0x7F
447 Have no use in URIs so must be escaped
448 space : 0x20
449 Must be escaped
450 Delimiters : '<>#%"'
451 Must be escaped
452 Unwise : "{}|\^[]`"
453 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000454
Brett Cannon74bfd702003-04-25 09:39:47 +0000455 """
456
457 def test_never_quote(self):
458 # Make sure quote() does not quote letters, digits, and "_,.-"
459 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
460 "abcdefghijklmnopqrstuvwxyz",
461 "0123456789",
462 "_.-"])
463 result = urllib.quote(do_not_quote)
464 self.assertEqual(do_not_quote, result,
465 "using quote(): %s != %s" % (do_not_quote, result))
466 result = urllib.quote_plus(do_not_quote)
467 self.assertEqual(do_not_quote, result,
468 "using quote_plus(): %s != %s" % (do_not_quote, result))
469
470 def test_default_safe(self):
471 # Test '/' is default value for 'safe' parameter
472 self.assertEqual(urllib.quote.func_defaults[0], '/')
473
474 def test_safe(self):
475 # Test setting 'safe' parameter does what it should do
476 quote_by_default = "<>"
477 result = urllib.quote(quote_by_default, safe=quote_by_default)
478 self.assertEqual(quote_by_default, result,
479 "using quote(): %s != %s" % (quote_by_default, result))
480 result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
481 self.assertEqual(quote_by_default, result,
482 "using quote_plus(): %s != %s" %
483 (quote_by_default, result))
484
485 def test_default_quoting(self):
486 # Make sure all characters that should be quoted are by default sans
487 # space (separate test for that).
488 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
489 should_quote.append('<>#%"{}|\^[]`')
490 should_quote.append(chr(127)) # For 0x7F
491 should_quote = ''.join(should_quote)
492 for char in should_quote:
493 result = urllib.quote(char)
494 self.assertEqual(hexescape(char), result,
495 "using quote(): %s should be escaped to %s, not %s" %
496 (char, hexescape(char), result))
497 result = urllib.quote_plus(char)
498 self.assertEqual(hexescape(char), result,
499 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000500 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000501 (char, hexescape(char), result))
502 del should_quote
503 partial_quote = "ab[]cd"
504 expected = "ab%5B%5Dcd"
505 result = urllib.quote(partial_quote)
506 self.assertEqual(expected, result,
507 "using quote(): %s != %s" % (expected, result))
Senthil Kumaran0d4c34c2011-09-13 06:42:21 +0800508 result = urllib.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000509 self.assertEqual(expected, result,
510 "using quote_plus(): %s != %s" % (expected, result))
Senthil Kumaranc7743aa2010-07-19 17:35:50 +0000511 self.assertRaises(TypeError, urllib.quote, None)
Brett Cannon74bfd702003-04-25 09:39:47 +0000512
513 def test_quoting_space(self):
514 # Make sure quote() and quote_plus() handle spaces as specified in
515 # their unique way
516 result = urllib.quote(' ')
517 self.assertEqual(result, hexescape(' '),
518 "using quote(): %s != %s" % (result, hexescape(' ')))
519 result = urllib.quote_plus(' ')
520 self.assertEqual(result, '+',
521 "using quote_plus(): %s != +" % result)
522 given = "a b cd e f"
523 expect = given.replace(' ', hexescape(' '))
524 result = urllib.quote(given)
525 self.assertEqual(expect, result,
526 "using quote(): %s != %s" % (expect, result))
527 expect = given.replace(' ', '+')
528 result = urllib.quote_plus(given)
529 self.assertEqual(expect, result,
530 "using quote_plus(): %s != %s" % (expect, result))
531
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000532 def test_quoting_plus(self):
533 self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
534 'alpha%2Bbeta+gamma')
535 self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
536 'alpha+beta+gamma')
537
Brett Cannon74bfd702003-04-25 09:39:47 +0000538class UnquotingTests(unittest.TestCase):
539 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000540
Brett Cannon74bfd702003-04-25 09:39:47 +0000541 See the doc string for quoting_Tests for details on quoting and such.
542
543 """
544
545 def test_unquoting(self):
546 # Make sure unquoting of all ASCII values works
547 escape_list = []
548 for num in range(128):
549 given = hexescape(chr(num))
550 expect = chr(num)
551 result = urllib.unquote(given)
552 self.assertEqual(expect, result,
553 "using unquote(): %s != %s" % (expect, result))
554 result = urllib.unquote_plus(given)
555 self.assertEqual(expect, result,
556 "using unquote_plus(): %s != %s" %
557 (expect, result))
558 escape_list.append(given)
559 escape_string = ''.join(escape_list)
560 del escape_list
561 result = urllib.unquote(escape_string)
562 self.assertEqual(result.count('%'), 1,
563 "using quote(): not all characters escaped; %s" %
564 result)
565 result = urllib.unquote(escape_string)
566 self.assertEqual(result.count('%'), 1,
567 "using unquote(): not all characters escaped: "
568 "%s" % result)
569
Senthil Kumaranf3e9b2a2010-03-18 12:14:15 +0000570 def test_unquoting_badpercent(self):
571 # Test unquoting on bad percent-escapes
572 given = '%xab'
573 expect = given
574 result = urllib.unquote(given)
575 self.assertEqual(expect, result, "using unquote(): %r != %r"
576 % (expect, result))
577 given = '%x'
578 expect = given
579 result = urllib.unquote(given)
580 self.assertEqual(expect, result, "using unquote(): %r != %r"
581 % (expect, result))
582 given = '%'
583 expect = given
584 result = urllib.unquote(given)
585 self.assertEqual(expect, result, "using unquote(): %r != %r"
586 % (expect, result))
587
588 def test_unquoting_mixed_case(self):
589 # Test unquoting on mixed-case hex digits in the percent-escapes
590 given = '%Ab%eA'
591 expect = '\xab\xea'
592 result = urllib.unquote(given)
593 self.assertEqual(expect, result, "using unquote(): %r != %r"
594 % (expect, result))
595
Brett Cannon74bfd702003-04-25 09:39:47 +0000596 def test_unquoting_parts(self):
597 # Make sure unquoting works when have non-quoted characters
598 # interspersed
599 given = 'ab%sd' % hexescape('c')
600 expect = "abcd"
601 result = urllib.unquote(given)
602 self.assertEqual(expect, result,
603 "using quote(): %s != %s" % (expect, result))
604 result = urllib.unquote_plus(given)
605 self.assertEqual(expect, result,
606 "using unquote_plus(): %s != %s" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000607
Brett Cannon74bfd702003-04-25 09:39:47 +0000608 def test_unquoting_plus(self):
609 # Test difference between unquote() and unquote_plus()
610 given = "are+there+spaces..."
611 expect = given
612 result = urllib.unquote(given)
613 self.assertEqual(expect, result,
614 "using unquote(): %s != %s" % (expect, result))
615 expect = given.replace('+', ' ')
616 result = urllib.unquote_plus(given)
617 self.assertEqual(expect, result,
618 "using unquote_plus(): %s != %s" % (expect, result))
619
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000620 def test_unquote_with_unicode(self):
621 r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
622 self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
623
Brett Cannon74bfd702003-04-25 09:39:47 +0000624class urlencode_Tests(unittest.TestCase):
625 """Tests for urlencode()"""
626
627 def help_inputtype(self, given, test_type):
628 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000629
Brett Cannon74bfd702003-04-25 09:39:47 +0000630 'given' must lead to only the pairs:
631 * 1st, 1
632 * 2nd, 2
633 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000634
Brett Cannon74bfd702003-04-25 09:39:47 +0000635 Test cannot assume anything about order. Docs make no guarantee and
636 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000637
Brett Cannon74bfd702003-04-25 09:39:47 +0000638 """
639 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
640 result = urllib.urlencode(given)
641 for expected in expect_somewhere:
Ezio Melottiaa980582010-01-23 23:04:36 +0000642 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000643 "testing %s: %s not found in %s" %
644 (test_type, expected, result))
645 self.assertEqual(result.count('&'), 2,
646 "testing %s: expected 2 '&'s; got %s" %
647 (test_type, result.count('&')))
648 amp_location = result.index('&')
649 on_amp_left = result[amp_location - 1]
650 on_amp_right = result[amp_location + 1]
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000651 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000652 "testing %s: '&' not located in proper place in %s" %
653 (test_type, result))
654 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
655 "testing %s: "
656 "unexpected number of characters: %s != %s" %
657 (test_type, len(result), (5 * 3) + 2))
658
659 def test_using_mapping(self):
660 # Test passing in a mapping object as an argument.
661 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
662 "using dict as input type")
663
664 def test_using_sequence(self):
665 # Test passing in a sequence of two-item sequences as an argument.
666 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
667 "using sequence of two-item tuples as input")
668
669 def test_quoting(self):
670 # Make sure keys and values are quoted using quote_plus()
671 given = {"&":"="}
672 expect = "%s=%s" % (hexescape('&'), hexescape('='))
673 result = urllib.urlencode(given)
674 self.assertEqual(expect, result)
675 given = {"key name":"A bunch of pluses"}
676 expect = "key+name=A+bunch+of+pluses"
677 result = urllib.urlencode(given)
678 self.assertEqual(expect, result)
679
680 def test_doseq(self):
681 # Test that passing True for 'doseq' parameter works correctly
682 given = {'sequence':['1', '2', '3']}
683 expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
684 result = urllib.urlencode(given)
685 self.assertEqual(expect, result)
686 result = urllib.urlencode(given, True)
687 for value in given["sequence"]:
688 expect = "sequence=%s" % value
Ezio Melottiaa980582010-01-23 23:04:36 +0000689 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000690 self.assertEqual(result.count('&'), 2,
691 "Expected 2 '&'s, got %s" % result.count('&'))
692
693class Pathname_Tests(unittest.TestCase):
694 """Test pathname2url() and url2pathname()"""
695
696 def test_basic(self):
697 # Make sure simple tests pass
698 expected_path = os.path.join("parts", "of", "a", "path")
699 expected_url = "parts/of/a/path"
700 result = urllib.pathname2url(expected_path)
701 self.assertEqual(expected_url, result,
702 "pathname2url() failed; %s != %s" %
703 (result, expected_url))
704 result = urllib.url2pathname(expected_url)
705 self.assertEqual(expected_path, result,
706 "url2pathame() failed; %s != %s" %
707 (result, expected_path))
708
709 def test_quoting(self):
710 # Test automatic quoting and unquoting works for pathnam2url() and
711 # url2pathname() respectively
712 given = os.path.join("needs", "quot=ing", "here")
713 expect = "needs/%s/here" % urllib.quote("quot=ing")
714 result = urllib.pathname2url(given)
715 self.assertEqual(expect, result,
716 "pathname2url() failed; %s != %s" %
717 (expect, result))
718 expect = given
719 result = urllib.url2pathname(result)
720 self.assertEqual(expect, result,
721 "url2pathname() failed; %s != %s" %
722 (expect, result))
723 given = os.path.join("make sure", "using_quote")
724 expect = "%s/using_quote" % urllib.quote("make sure")
725 result = urllib.pathname2url(given)
726 self.assertEqual(expect, result,
727 "pathname2url() failed; %s != %s" %
728 (expect, result))
729 given = "make+sure/using_unquote"
730 expect = os.path.join("make+sure", "using_unquote")
731 result = urllib.url2pathname(given)
732 self.assertEqual(expect, result,
733 "url2pathname() failed; %s != %s" %
734 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000735
Senthil Kumarana99b7612011-04-14 12:54:35 +0800736 @unittest.skipUnless(sys.platform == 'win32',
737 'test specific to the nturl2path library')
738 def test_ntpath(self):
739 given = ('/C:/', '///C:/', '/C|//')
740 expect = 'C:\\'
741 for url in given:
742 result = urllib.url2pathname(url)
743 self.assertEqual(expect, result,
744 'nturl2path.url2pathname() failed; %s != %s' %
745 (expect, result))
746 given = '///C|/path'
747 expect = 'C:\\path'
748 result = urllib.url2pathname(given)
749 self.assertEqual(expect, result,
750 'nturl2path.url2pathname() failed; %s != %s' %
751 (expect, result))
752
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000753class Utility_Tests(unittest.TestCase):
754 """Testcase to test the various utility functions in the urllib."""
755
756 def test_splitpasswd(self):
757 """Some of the password examples are not sensible, but it is added to
758 confirming to RFC2617 and addressing issue4675.
759 """
760 self.assertEqual(('user', 'ab'),urllib.splitpasswd('user:ab'))
761 self.assertEqual(('user', 'a\nb'),urllib.splitpasswd('user:a\nb'))
762 self.assertEqual(('user', 'a\tb'),urllib.splitpasswd('user:a\tb'))
763 self.assertEqual(('user', 'a\rb'),urllib.splitpasswd('user:a\rb'))
764 self.assertEqual(('user', 'a\fb'),urllib.splitpasswd('user:a\fb'))
765 self.assertEqual(('user', 'a\vb'),urllib.splitpasswd('user:a\vb'))
766 self.assertEqual(('user', 'a:b'),urllib.splitpasswd('user:a:b'))
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800767 self.assertEqual(('user', 'a b'),urllib.splitpasswd('user:a b'))
768 self.assertEqual(('user 2', 'ab'),urllib.splitpasswd('user 2:ab'))
769 self.assertEqual(('user+1', 'a+b'),urllib.splitpasswd('user+1:a+b'))
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000770
771
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000772class URLopener_Tests(unittest.TestCase):
773 """Testcase to test the open method of URLopener class."""
774
775 def test_quoted_open(self):
776 class DummyURLopener(urllib.URLopener):
777 def open_spam(self, url):
778 return url
779
780 self.assertEqual(DummyURLopener().open(
781 'spam://example/ /'),'//example/%20/')
782
Senthil Kumaran18d5a692010-02-20 22:05:34 +0000783 # test the safe characters are not quoted by urlopen
784 self.assertEqual(DummyURLopener().open(
785 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
786 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
787
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000788
Facundo Batistad9880d02007-05-25 04:20:22 +0000789# Just commented them out.
790# Can't really tell why keep failing in windows and sparc.
Ezio Melottic2077b02011-03-16 12:34:31 +0200791# Everywhere else they work ok, but on those machines, sometimes
Facundo Batistad9880d02007-05-25 04:20:22 +0000792# fail in one of the tests, sometimes in other. I have a linux, and
793# the tests go ok.
794# If anybody has one of the problematic enviroments, please help!
795# . Facundo
796#
797# def server(evt):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000798# import socket, time
Facundo Batistad9880d02007-05-25 04:20:22 +0000799# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
800# serv.settimeout(3)
801# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
802# serv.bind(("", 9093))
803# serv.listen(5)
804# try:
805# conn, addr = serv.accept()
806# conn.send("1 Hola mundo\n")
807# cantdata = 0
808# while cantdata < 13:
809# data = conn.recv(13-cantdata)
810# cantdata += len(data)
811# time.sleep(.3)
812# conn.send("2 No more lines\n")
813# conn.close()
814# except socket.timeout:
815# pass
816# finally:
817# serv.close()
818# evt.set()
819#
820# class FTPWrapperTests(unittest.TestCase):
821#
822# def setUp(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000823# import ftplib, time, threading
Facundo Batistad9880d02007-05-25 04:20:22 +0000824# ftplib.FTP.port = 9093
825# self.evt = threading.Event()
826# threading.Thread(target=server, args=(self.evt,)).start()
827# time.sleep(.1)
828#
829# def tearDown(self):
830# self.evt.wait()
831#
832# def testBasic(self):
833# # connects
834# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000835# ftp.close()
Facundo Batistad9880d02007-05-25 04:20:22 +0000836#
837# def testTimeoutNone(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000838# # global default timeout is ignored
839# import socket
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000840# self.assertTrue(socket.getdefaulttimeout() is None)
Facundo Batistad9880d02007-05-25 04:20:22 +0000841# socket.setdefaulttimeout(30)
842# try:
843# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
844# finally:
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000845# socket.setdefaulttimeout(None)
Facundo Batistad9880d02007-05-25 04:20:22 +0000846# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000847# ftp.close()
Facundo Batistad9880d02007-05-25 04:20:22 +0000848#
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000849# def testTimeoutDefault(self):
850# # global default timeout is used
851# import socket
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000852# self.assertTrue(socket.getdefaulttimeout() is None)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000853# socket.setdefaulttimeout(30)
854# try:
855# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
856# finally:
857# socket.setdefaulttimeout(None)
858# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
859# ftp.close()
860#
861# def testTimeoutValue(self):
862# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
863# timeout=30)
864# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
865# ftp.close()
Facundo Batista711a54e2007-05-24 17:50:54 +0000866
Skip Montanaro080c9972001-01-28 21:12:22 +0000867
868
Brett Cannon74bfd702003-04-25 09:39:47 +0000869def test_main():
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000870 import warnings
Brett Cannon672237d2008-09-09 00:49:16 +0000871 with warnings.catch_warnings():
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000872 warnings.filterwarnings('ignore', ".*urllib\.urlopen.*Python 3.0",
873 DeprecationWarning)
874 test_support.run_unittest(
875 urlopen_FileTests,
876 urlopen_HttpTests,
877 urlretrieve_FileTests,
Senthil Kumaran87e58552011-11-01 02:44:45 +0800878 urlretrieve_HttpTests,
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000879 ProxyTests,
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000880 QuotingTests,
881 UnquotingTests,
882 urlencode_Tests,
883 Pathname_Tests,
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000884 Utility_Tests,
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000885 URLopener_Tests,
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000886 #FTPWrapperTests,
887 )
Brett Cannon74bfd702003-04-25 09:39:47 +0000888
889
890
891if __name__ == '__main__':
892 test_main()