blob: 085eecf0f609033c56d1445ba9a521ca837e950f [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton6102e292000-08-31 15:48:10 +00003import urllib
Hye-Shik Chang39aef792004-06-05 13:30:56 +00004import httplib
Brett Cannon74bfd702003-04-25 09:39:47 +00005import unittest
Brett Cannon74bfd702003-04-25 09:39:47 +00006import os
Senthil Kumarana99b7612011-04-14 12:54:35 +08007import sys
Brett Cannon74bfd702003-04-25 09:39:47 +00008import mimetools
Georg Brandl5a650a22005-08-26 08:51:34 +00009import tempfile
Hye-Shik Chang39aef792004-06-05 13:30:56 +000010import StringIO
Jeremy Hylton6102e292000-08-31 15:48:10 +000011
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080012from test import test_support
13from base64 import b64encode
14
15
Brett Cannon74bfd702003-04-25 09:39:47 +000016def hexescape(char):
17 """Escape char as RFC 2396 specifies"""
18 hex_repr = hex(ord(char))[2:].upper()
19 if len(hex_repr) == 1:
20 hex_repr = "0%s" % hex_repr
21 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000022
Senthil Kumaran87e58552011-11-01 02:44:45 +080023
24class FakeHTTPMixin(object):
25 def fakehttp(self, fakedata):
26 class FakeSocket(StringIO.StringIO):
27
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080028 def sendall(self, data):
29 FakeHTTPConnection.buf = data
30
Senthil Kumaran87e58552011-11-01 02:44:45 +080031 def makefile(self, *args, **kwds):
32 return self
33
34 def read(self, amt=None):
35 if self.closed:
36 return ""
37 return StringIO.StringIO.read(self, amt)
38
39 def readline(self, length=None):
40 if self.closed:
41 return ""
42 return StringIO.StringIO.readline(self, length)
43
44 class FakeHTTPConnection(httplib.HTTPConnection):
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080045
46 # buffer to store data for verification in urlopen tests.
47 buf = ""
48
Senthil Kumaran87e58552011-11-01 02:44:45 +080049 def connect(self):
50 self.sock = FakeSocket(fakedata)
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080051
Senthil Kumaran87e58552011-11-01 02:44:45 +080052 assert httplib.HTTP._connection_class == httplib.HTTPConnection
Senthil Kumaranbcd833f2012-01-11 00:09:24 +080053
Senthil Kumaran87e58552011-11-01 02:44:45 +080054 httplib.HTTP._connection_class = FakeHTTPConnection
55
56 def unfakehttp(self):
57 httplib.HTTP._connection_class = httplib.HTTPConnection
58
59
Brett Cannon74bfd702003-04-25 09:39:47 +000060class urlopen_FileTests(unittest.TestCase):
61 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000062
Brett Cannon74bfd702003-04-25 09:39:47 +000063 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000064 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000065
Brett Cannon74bfd702003-04-25 09:39:47 +000066 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000067
Brett Cannon74bfd702003-04-25 09:39:47 +000068 def setUp(self):
69 """Setup of a temp file to use for testing"""
70 self.text = "test_urllib: %s\n" % self.__class__.__name__
Guido van Rossum51735b02003-04-25 15:01:05 +000071 FILE = file(test_support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000072 try:
73 FILE.write(self.text)
74 finally:
75 FILE.close()
76 self.pathname = test_support.TESTFN
77 self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def tearDown(self):
80 """Shut down the open object"""
81 self.returned_obj.close()
Brett Cannon19691362003-04-29 05:08:06 +000082 os.remove(test_support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000083
Brett Cannon74bfd702003-04-25 09:39:47 +000084 def test_interface(self):
85 # Make sure object returned by urlopen() has the specified methods
86 for attr in ("read", "readline", "readlines", "fileno",
Georg Brandl9b0d46d2008-01-20 11:43:03 +000087 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Peterson5c8da862009-06-30 22:57:08 +000088 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000089 "object returned by urlopen() lacks %s attribute" %
90 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000091
Brett Cannon74bfd702003-04-25 09:39:47 +000092 def test_read(self):
93 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000094
Brett Cannon74bfd702003-04-25 09:39:47 +000095 def test_readline(self):
96 self.assertEqual(self.text, self.returned_obj.readline())
97 self.assertEqual('', self.returned_obj.readline(),
98 "calling readline() after exhausting the file did not"
99 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +0000100
Brett Cannon74bfd702003-04-25 09:39:47 +0000101 def test_readlines(self):
102 lines_list = self.returned_obj.readlines()
103 self.assertEqual(len(lines_list), 1,
104 "readlines() returned the wrong number of lines")
105 self.assertEqual(lines_list[0], self.text,
106 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +0000107
Brett Cannon74bfd702003-04-25 09:39:47 +0000108 def test_fileno(self):
109 file_num = self.returned_obj.fileno()
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000110 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +0000111 self.assertEqual(os.read(file_num, len(self.text)), self.text,
112 "Reading on the file descriptor returned by fileno() "
113 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000114
Brett Cannon74bfd702003-04-25 09:39:47 +0000115 def test_close(self):
116 # Test close() by calling it hear and then having it be called again
117 # by the tearDown() method for the test
118 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +0000119
Brett Cannon74bfd702003-04-25 09:39:47 +0000120 def test_info(self):
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000121 self.assertIsInstance(self.returned_obj.info(), mimetools.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +0000122
Brett Cannon74bfd702003-04-25 09:39:47 +0000123 def test_geturl(self):
124 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +0000125
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000126 def test_getcode(self):
127 self.assertEqual(self.returned_obj.getcode(), None)
128
Brett Cannon74bfd702003-04-25 09:39:47 +0000129 def test_iter(self):
130 # Test iterator
131 # Don't need to count number of iterations since test would fail the
132 # instant it returned anything beyond the first line from the
133 # comparison
134 for line in self.returned_obj.__iter__():
135 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +0000136
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000137class ProxyTests(unittest.TestCase):
138
139 def setUp(self):
Walter Dörwald4b965f62009-04-26 20:51:44 +0000140 # Records changes to env vars
141 self.env = test_support.EnvironmentVarGuard()
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000142 # Delete all proxy related env vars
Senthil Kumaran7a2ee0b2010-01-08 19:20:25 +0000143 for k in os.environ.keys():
Walter Dörwald4b965f62009-04-26 20:51:44 +0000144 if 'proxy' in k.lower():
Senthil Kumarandc61ec32009-10-01 01:50:13 +0000145 self.env.unset(k)
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000146
147 def tearDown(self):
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000148 # Restore all proxy related env vars
Walter Dörwald4b965f62009-04-26 20:51:44 +0000149 self.env.__exit__()
150 del self.env
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000151
152 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwald4b965f62009-04-26 20:51:44 +0000153 self.env.set('NO_PROXY', 'localhost')
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000154 proxies = urllib.getproxies_environment()
155 # getproxies_environment use lowered case truncated (no '_proxy') keys
Ezio Melotti2623a372010-11-21 13:34:58 +0000156 self.assertEqual('localhost', proxies['no'])
Senthil Kumaranb5bd4c82011-08-06 12:24:33 +0800157 # List of no_proxies with space.
158 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com')
159 self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com'))
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000160
161
Senthil Kumaran87e58552011-11-01 02:44:45 +0800162class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000163 """Test urlopen() opening a fake http connection."""
164
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000165 def test_read(self):
166 self.fakehttp('Hello!')
167 try:
168 fp = urllib.urlopen("http://python.org/")
169 self.assertEqual(fp.readline(), 'Hello!')
170 self.assertEqual(fp.readline(), '')
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000171 self.assertEqual(fp.geturl(), 'http://python.org/')
172 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000173 finally:
174 self.unfakehttp()
175
Senthil Kumaran49c44082011-04-13 07:31:45 +0800176 def test_url_fragment(self):
177 # Issue #11703: geturl() omits fragments in the original URL.
178 url = 'http://docs.python.org/library/urllib.html#OK'
179 self.fakehttp('Hello!')
180 try:
181 fp = urllib.urlopen(url)
182 self.assertEqual(fp.geturl(), url)
183 finally:
184 self.unfakehttp()
185
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000186 def test_read_bogus(self):
Kurt B. Kaiser0a112322008-01-02 05:23:38 +0000187 # urlopen() should raise IOError for many error codes.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000188 self.fakehttp('''HTTP/1.1 401 Authentication Required
189Date: Wed, 02 Jan 2008 03:03:54 GMT
190Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
191Connection: close
192Content-Type: text/html; charset=iso-8859-1
193''')
194 try:
195 self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
196 finally:
197 self.unfakehttp()
198
guido@google.comf1509302011-03-28 13:47:01 -0700199 def test_invalid_redirect(self):
200 # urlopen() should raise IOError for many error codes.
201 self.fakehttp("""HTTP/1.1 302 Found
202Date: Wed, 02 Jan 2008 03:03:54 GMT
203Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
204Location: file:README
205Connection: close
206Content-Type: text/html; charset=iso-8859-1
207""")
208 try:
209 self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
210 finally:
211 self.unfakehttp()
212
Georg Brandlf66b6032007-03-14 08:27:52 +0000213 def test_empty_socket(self):
Kurt B. Kaiser0a112322008-01-02 05:23:38 +0000214 # urlopen() raises IOError if the underlying socket does not send any
215 # data. (#1680230)
Georg Brandlf66b6032007-03-14 08:27:52 +0000216 self.fakehttp('')
217 try:
218 self.assertRaises(IOError, urllib.urlopen, 'http://something')
219 finally:
220 self.unfakehttp()
221
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800222 def test_userpass_inurl(self):
223 self.fakehttp('Hello!')
224 try:
225 fakehttp_wrapper = httplib.HTTP._connection_class
226 fp = urllib.urlopen("http://user:pass@python.org/")
227 authorization = ("Authorization: Basic %s\r\n" %
228 b64encode('user:pass'))
229 # The authorization header must be in place
230 self.assertIn(authorization, fakehttp_wrapper.buf)
231 self.assertEqual(fp.readline(), "Hello!")
232 self.assertEqual(fp.readline(), "")
233 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
234 self.assertEqual(fp.getcode(), 200)
235 finally:
236 self.unfakehttp()
237
238 def test_userpass_with_spaces_inurl(self):
239 self.fakehttp('Hello!')
240 try:
241 url = "http://a b:c d@python.org/"
242 fakehttp_wrapper = httplib.HTTP._connection_class
243 authorization = ("Authorization: Basic %s\r\n" %
244 b64encode('a b:c d'))
245 fp = urllib.urlopen(url)
246 # The authorization header must be in place
247 self.assertIn(authorization, fakehttp_wrapper.buf)
248 self.assertEqual(fp.readline(), "Hello!")
249 self.assertEqual(fp.readline(), "")
250 # the spaces are quoted in URL so no match
251 self.assertNotEqual(fp.geturl(), url)
252 self.assertEqual(fp.getcode(), 200)
253 finally:
254 self.unfakehttp()
255
256
Brett Cannon19691362003-04-29 05:08:06 +0000257class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000258 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000259
Brett Cannon19691362003-04-29 05:08:06 +0000260 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000261 # Create a list of temporary files. Each item in the list is a file
262 # name (absolute path or relative to the current working directory).
263 # All files in this list will be deleted in the tearDown method. Note,
264 # this only helps to makes sure temporary files get deleted, but it
265 # does nothing about trying to close files that may still be open. It
266 # is the responsibility of the developer to properly close files even
267 # when exceptional conditions occur.
268 self.tempFiles = []
269
Brett Cannon19691362003-04-29 05:08:06 +0000270 # Create a temporary file.
Georg Brandl5a650a22005-08-26 08:51:34 +0000271 self.registerFileForCleanUp(test_support.TESTFN)
Brett Cannon19691362003-04-29 05:08:06 +0000272 self.text = 'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000273 try:
274 FILE = file(test_support.TESTFN, 'wb')
275 FILE.write(self.text)
276 FILE.close()
277 finally:
278 try: FILE.close()
279 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000280
281 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000282 # Delete the temporary files.
283 for each in self.tempFiles:
284 try: os.remove(each)
285 except: pass
286
287 def constructLocalFileUrl(self, filePath):
288 return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
289
290 def createNewTempFile(self, data=""):
291 """Creates a new temporary file containing the specified data,
292 registers the file for deletion during the test fixture tear down, and
293 returns the absolute path of the file."""
294
295 newFd, newFilePath = tempfile.mkstemp()
296 try:
297 self.registerFileForCleanUp(newFilePath)
298 newFile = os.fdopen(newFd, "wb")
299 newFile.write(data)
300 newFile.close()
301 finally:
302 try: newFile.close()
303 except: pass
304 return newFilePath
305
306 def registerFileForCleanUp(self, fileName):
307 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000308
309 def test_basic(self):
310 # Make sure that a local file just gets its own location returned and
311 # a headers value is returned.
312 result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
313 self.assertEqual(result[0], test_support.TESTFN)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000314 self.assertIsInstance(result[1], mimetools.Message,
315 "did not get a mimetools.Message instance as "
316 "second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000317
318 def test_copy(self):
319 # Test that setting the filename argument works.
320 second_temp = "%s.2" % test_support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000321 self.registerFileForCleanUp(second_temp)
322 result = urllib.urlretrieve(self.constructLocalFileUrl(
323 test_support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000324 self.assertEqual(second_temp, result[0])
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000325 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000326 "made")
327 FILE = file(second_temp, 'rb')
328 try:
329 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000330 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000331 finally:
332 try: FILE.close()
333 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000334 self.assertEqual(self.text, text)
335
336 def test_reporthook(self):
337 # Make sure that the reporthook works.
338 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000339 self.assertIsInstance(count, int)
340 self.assertIsInstance(block_size, int)
341 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000342 self.assertEqual(count, count_holder[0])
343 count_holder[0] = count_holder[0] + 1
344 second_temp = "%s.2" % test_support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000345 self.registerFileForCleanUp(second_temp)
346 urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
347 second_temp, hooktester)
348
349 def test_reporthook_0_bytes(self):
350 # Test on zero length file. Should call reporthook only 1 time.
351 report = []
352 def hooktester(count, block_size, total_size, _report=report):
353 _report.append((count, block_size, total_size))
354 srcFileName = self.createNewTempFile()
355 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
356 test_support.TESTFN, hooktester)
357 self.assertEqual(len(report), 1)
358 self.assertEqual(report[0][2], 0)
359
360 def test_reporthook_5_bytes(self):
361 # Test on 5 byte file. Should call reporthook only 2 times (once when
362 # the "network connection" is established and once when the block is
363 # read). Since the block size is 8192 bytes, only one block read is
364 # required to read the entire file.
365 report = []
366 def hooktester(count, block_size, total_size, _report=report):
367 _report.append((count, block_size, total_size))
368 srcFileName = self.createNewTempFile("x" * 5)
369 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
370 test_support.TESTFN, hooktester)
371 self.assertEqual(len(report), 2)
372 self.assertEqual(report[0][1], 8192)
373 self.assertEqual(report[0][2], 5)
374
375 def test_reporthook_8193_bytes(self):
376 # Test on 8193 byte file. Should call reporthook only 3 times (once
377 # when the "network connection" is established, once for the next 8192
378 # bytes, and once for the last byte).
379 report = []
380 def hooktester(count, block_size, total_size, _report=report):
381 _report.append((count, block_size, total_size))
382 srcFileName = self.createNewTempFile("x" * 8193)
383 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
384 test_support.TESTFN, hooktester)
385 self.assertEqual(len(report), 3)
386 self.assertEqual(report[0][1], 8192)
387 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000388
Senthil Kumaran87e58552011-11-01 02:44:45 +0800389
390class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
391 """Test urllib.urlretrieve() using fake http connections"""
392
393 def test_short_content_raises_ContentTooShortError(self):
394 self.fakehttp('''HTTP/1.1 200 OK
395Date: Wed, 02 Jan 2008 03:03:54 GMT
396Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
397Connection: close
398Content-Length: 100
399Content-Type: text/html; charset=iso-8859-1
400
401FF
402''')
403
404 def _reporthook(par1, par2, par3):
405 pass
406
407 try:
408 self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
409 'http://example.com', reporthook=_reporthook)
410 finally:
411 self.unfakehttp()
412
413 def test_short_content_raises_ContentTooShortError_without_reporthook(self):
414 self.fakehttp('''HTTP/1.1 200 OK
415Date: Wed, 02 Jan 2008 03:03:54 GMT
416Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
417Connection: close
418Content-Length: 100
419Content-Type: text/html; charset=iso-8859-1
420
421FF
422''')
423 try:
424 self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
425 finally:
426 self.unfakehttp()
427
Brett Cannon74bfd702003-04-25 09:39:47 +0000428class QuotingTests(unittest.TestCase):
429 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000430
Brett Cannon74bfd702003-04-25 09:39:47 +0000431 According to RFC 2396 ("Uniform Resource Identifiers), to escape a
432 character you write it as '%' + <2 character US-ASCII hex value>. The Python
433 code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
434 Case does not matter on the hex letters.
435
436 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000437
Brett Cannon74bfd702003-04-25 09:39:47 +0000438 Reserved characters : ";/?:@&=+$,"
439 Have special meaning in URIs and must be escaped if not being used for
440 their special meaning
441 Data characters : letters, digits, and "-_.!~*'()"
442 Unreserved and do not need to be escaped; can be, though, if desired
443 Control characters : 0x00 - 0x1F, 0x7F
444 Have no use in URIs so must be escaped
445 space : 0x20
446 Must be escaped
447 Delimiters : '<>#%"'
448 Must be escaped
449 Unwise : "{}|\^[]`"
450 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000451
Brett Cannon74bfd702003-04-25 09:39:47 +0000452 """
453
454 def test_never_quote(self):
455 # Make sure quote() does not quote letters, digits, and "_,.-"
456 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
457 "abcdefghijklmnopqrstuvwxyz",
458 "0123456789",
459 "_.-"])
460 result = urllib.quote(do_not_quote)
461 self.assertEqual(do_not_quote, result,
462 "using quote(): %s != %s" % (do_not_quote, result))
463 result = urllib.quote_plus(do_not_quote)
464 self.assertEqual(do_not_quote, result,
465 "using quote_plus(): %s != %s" % (do_not_quote, result))
466
467 def test_default_safe(self):
468 # Test '/' is default value for 'safe' parameter
469 self.assertEqual(urllib.quote.func_defaults[0], '/')
470
471 def test_safe(self):
472 # Test setting 'safe' parameter does what it should do
473 quote_by_default = "<>"
474 result = urllib.quote(quote_by_default, safe=quote_by_default)
475 self.assertEqual(quote_by_default, result,
476 "using quote(): %s != %s" % (quote_by_default, result))
477 result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
478 self.assertEqual(quote_by_default, result,
479 "using quote_plus(): %s != %s" %
480 (quote_by_default, result))
481
482 def test_default_quoting(self):
483 # Make sure all characters that should be quoted are by default sans
484 # space (separate test for that).
485 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
486 should_quote.append('<>#%"{}|\^[]`')
487 should_quote.append(chr(127)) # For 0x7F
488 should_quote = ''.join(should_quote)
489 for char in should_quote:
490 result = urllib.quote(char)
491 self.assertEqual(hexescape(char), result,
492 "using quote(): %s should be escaped to %s, not %s" %
493 (char, hexescape(char), result))
494 result = urllib.quote_plus(char)
495 self.assertEqual(hexescape(char), result,
496 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000497 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000498 (char, hexescape(char), result))
499 del should_quote
500 partial_quote = "ab[]cd"
501 expected = "ab%5B%5Dcd"
502 result = urllib.quote(partial_quote)
503 self.assertEqual(expected, result,
504 "using quote(): %s != %s" % (expected, result))
Senthil Kumaran0d4c34c2011-09-13 06:42:21 +0800505 result = urllib.quote_plus(partial_quote)
Brett Cannon74bfd702003-04-25 09:39:47 +0000506 self.assertEqual(expected, result,
507 "using quote_plus(): %s != %s" % (expected, result))
Senthil Kumaranc7743aa2010-07-19 17:35:50 +0000508 self.assertRaises(TypeError, urllib.quote, None)
Brett Cannon74bfd702003-04-25 09:39:47 +0000509
510 def test_quoting_space(self):
511 # Make sure quote() and quote_plus() handle spaces as specified in
512 # their unique way
513 result = urllib.quote(' ')
514 self.assertEqual(result, hexescape(' '),
515 "using quote(): %s != %s" % (result, hexescape(' ')))
516 result = urllib.quote_plus(' ')
517 self.assertEqual(result, '+',
518 "using quote_plus(): %s != +" % result)
519 given = "a b cd e f"
520 expect = given.replace(' ', hexescape(' '))
521 result = urllib.quote(given)
522 self.assertEqual(expect, result,
523 "using quote(): %s != %s" % (expect, result))
524 expect = given.replace(' ', '+')
525 result = urllib.quote_plus(given)
526 self.assertEqual(expect, result,
527 "using quote_plus(): %s != %s" % (expect, result))
528
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000529 def test_quoting_plus(self):
530 self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
531 'alpha%2Bbeta+gamma')
532 self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
533 'alpha+beta+gamma')
534
Brett Cannon74bfd702003-04-25 09:39:47 +0000535class UnquotingTests(unittest.TestCase):
536 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000537
Brett Cannon74bfd702003-04-25 09:39:47 +0000538 See the doc string for quoting_Tests for details on quoting and such.
539
540 """
541
542 def test_unquoting(self):
543 # Make sure unquoting of all ASCII values works
544 escape_list = []
545 for num in range(128):
546 given = hexescape(chr(num))
547 expect = chr(num)
548 result = urllib.unquote(given)
549 self.assertEqual(expect, result,
550 "using unquote(): %s != %s" % (expect, result))
551 result = urllib.unquote_plus(given)
552 self.assertEqual(expect, result,
553 "using unquote_plus(): %s != %s" %
554 (expect, result))
555 escape_list.append(given)
556 escape_string = ''.join(escape_list)
557 del escape_list
558 result = urllib.unquote(escape_string)
559 self.assertEqual(result.count('%'), 1,
560 "using quote(): not all characters escaped; %s" %
561 result)
562 result = urllib.unquote(escape_string)
563 self.assertEqual(result.count('%'), 1,
564 "using unquote(): not all characters escaped: "
565 "%s" % result)
566
Senthil Kumaranf3e9b2a2010-03-18 12:14:15 +0000567 def test_unquoting_badpercent(self):
568 # Test unquoting on bad percent-escapes
569 given = '%xab'
570 expect = given
571 result = urllib.unquote(given)
572 self.assertEqual(expect, result, "using unquote(): %r != %r"
573 % (expect, result))
574 given = '%x'
575 expect = given
576 result = urllib.unquote(given)
577 self.assertEqual(expect, result, "using unquote(): %r != %r"
578 % (expect, result))
579 given = '%'
580 expect = given
581 result = urllib.unquote(given)
582 self.assertEqual(expect, result, "using unquote(): %r != %r"
583 % (expect, result))
584
585 def test_unquoting_mixed_case(self):
586 # Test unquoting on mixed-case hex digits in the percent-escapes
587 given = '%Ab%eA'
588 expect = '\xab\xea'
589 result = urllib.unquote(given)
590 self.assertEqual(expect, result, "using unquote(): %r != %r"
591 % (expect, result))
592
Brett Cannon74bfd702003-04-25 09:39:47 +0000593 def test_unquoting_parts(self):
594 # Make sure unquoting works when have non-quoted characters
595 # interspersed
596 given = 'ab%sd' % hexescape('c')
597 expect = "abcd"
598 result = urllib.unquote(given)
599 self.assertEqual(expect, result,
600 "using quote(): %s != %s" % (expect, result))
601 result = urllib.unquote_plus(given)
602 self.assertEqual(expect, result,
603 "using unquote_plus(): %s != %s" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000604
Brett Cannon74bfd702003-04-25 09:39:47 +0000605 def test_unquoting_plus(self):
606 # Test difference between unquote() and unquote_plus()
607 given = "are+there+spaces..."
608 expect = given
609 result = urllib.unquote(given)
610 self.assertEqual(expect, result,
611 "using unquote(): %s != %s" % (expect, result))
612 expect = given.replace('+', ' ')
613 result = urllib.unquote_plus(given)
614 self.assertEqual(expect, result,
615 "using unquote_plus(): %s != %s" % (expect, result))
616
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000617 def test_unquote_with_unicode(self):
618 r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
619 self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
620
Brett Cannon74bfd702003-04-25 09:39:47 +0000621class urlencode_Tests(unittest.TestCase):
622 """Tests for urlencode()"""
623
624 def help_inputtype(self, given, test_type):
625 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000626
Brett Cannon74bfd702003-04-25 09:39:47 +0000627 'given' must lead to only the pairs:
628 * 1st, 1
629 * 2nd, 2
630 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000631
Brett Cannon74bfd702003-04-25 09:39:47 +0000632 Test cannot assume anything about order. Docs make no guarantee and
633 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000634
Brett Cannon74bfd702003-04-25 09:39:47 +0000635 """
636 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
637 result = urllib.urlencode(given)
638 for expected in expect_somewhere:
Ezio Melottiaa980582010-01-23 23:04:36 +0000639 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000640 "testing %s: %s not found in %s" %
641 (test_type, expected, result))
642 self.assertEqual(result.count('&'), 2,
643 "testing %s: expected 2 '&'s; got %s" %
644 (test_type, result.count('&')))
645 amp_location = result.index('&')
646 on_amp_left = result[amp_location - 1]
647 on_amp_right = result[amp_location + 1]
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000648 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000649 "testing %s: '&' not located in proper place in %s" %
650 (test_type, result))
651 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
652 "testing %s: "
653 "unexpected number of characters: %s != %s" %
654 (test_type, len(result), (5 * 3) + 2))
655
656 def test_using_mapping(self):
657 # Test passing in a mapping object as an argument.
658 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
659 "using dict as input type")
660
661 def test_using_sequence(self):
662 # Test passing in a sequence of two-item sequences as an argument.
663 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
664 "using sequence of two-item tuples as input")
665
666 def test_quoting(self):
667 # Make sure keys and values are quoted using quote_plus()
668 given = {"&":"="}
669 expect = "%s=%s" % (hexescape('&'), hexescape('='))
670 result = urllib.urlencode(given)
671 self.assertEqual(expect, result)
672 given = {"key name":"A bunch of pluses"}
673 expect = "key+name=A+bunch+of+pluses"
674 result = urllib.urlencode(given)
675 self.assertEqual(expect, result)
676
677 def test_doseq(self):
678 # Test that passing True for 'doseq' parameter works correctly
679 given = {'sequence':['1', '2', '3']}
680 expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
681 result = urllib.urlencode(given)
682 self.assertEqual(expect, result)
683 result = urllib.urlencode(given, True)
684 for value in given["sequence"]:
685 expect = "sequence=%s" % value
Ezio Melottiaa980582010-01-23 23:04:36 +0000686 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000687 self.assertEqual(result.count('&'), 2,
688 "Expected 2 '&'s, got %s" % result.count('&'))
689
690class Pathname_Tests(unittest.TestCase):
691 """Test pathname2url() and url2pathname()"""
692
693 def test_basic(self):
694 # Make sure simple tests pass
695 expected_path = os.path.join("parts", "of", "a", "path")
696 expected_url = "parts/of/a/path"
697 result = urllib.pathname2url(expected_path)
698 self.assertEqual(expected_url, result,
699 "pathname2url() failed; %s != %s" %
700 (result, expected_url))
701 result = urllib.url2pathname(expected_url)
702 self.assertEqual(expected_path, result,
703 "url2pathame() failed; %s != %s" %
704 (result, expected_path))
705
706 def test_quoting(self):
707 # Test automatic quoting and unquoting works for pathnam2url() and
708 # url2pathname() respectively
709 given = os.path.join("needs", "quot=ing", "here")
710 expect = "needs/%s/here" % urllib.quote("quot=ing")
711 result = urllib.pathname2url(given)
712 self.assertEqual(expect, result,
713 "pathname2url() failed; %s != %s" %
714 (expect, result))
715 expect = given
716 result = urllib.url2pathname(result)
717 self.assertEqual(expect, result,
718 "url2pathname() failed; %s != %s" %
719 (expect, result))
720 given = os.path.join("make sure", "using_quote")
721 expect = "%s/using_quote" % urllib.quote("make sure")
722 result = urllib.pathname2url(given)
723 self.assertEqual(expect, result,
724 "pathname2url() failed; %s != %s" %
725 (expect, result))
726 given = "make+sure/using_unquote"
727 expect = os.path.join("make+sure", "using_unquote")
728 result = urllib.url2pathname(given)
729 self.assertEqual(expect, result,
730 "url2pathname() failed; %s != %s" %
731 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000732
Senthil Kumarana99b7612011-04-14 12:54:35 +0800733 @unittest.skipUnless(sys.platform == 'win32',
734 'test specific to the nturl2path library')
735 def test_ntpath(self):
736 given = ('/C:/', '///C:/', '/C|//')
737 expect = 'C:\\'
738 for url in given:
739 result = urllib.url2pathname(url)
740 self.assertEqual(expect, result,
741 'nturl2path.url2pathname() failed; %s != %s' %
742 (expect, result))
743 given = '///C|/path'
744 expect = 'C:\\path'
745 result = urllib.url2pathname(given)
746 self.assertEqual(expect, result,
747 'nturl2path.url2pathname() failed; %s != %s' %
748 (expect, result))
749
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000750class Utility_Tests(unittest.TestCase):
751 """Testcase to test the various utility functions in the urllib."""
752
753 def test_splitpasswd(self):
754 """Some of the password examples are not sensible, but it is added to
755 confirming to RFC2617 and addressing issue4675.
756 """
757 self.assertEqual(('user', 'ab'),urllib.splitpasswd('user:ab'))
758 self.assertEqual(('user', 'a\nb'),urllib.splitpasswd('user:a\nb'))
759 self.assertEqual(('user', 'a\tb'),urllib.splitpasswd('user:a\tb'))
760 self.assertEqual(('user', 'a\rb'),urllib.splitpasswd('user:a\rb'))
761 self.assertEqual(('user', 'a\fb'),urllib.splitpasswd('user:a\fb'))
762 self.assertEqual(('user', 'a\vb'),urllib.splitpasswd('user:a\vb'))
763 self.assertEqual(('user', 'a:b'),urllib.splitpasswd('user:a:b'))
Senthil Kumaranbcd833f2012-01-11 00:09:24 +0800764 self.assertEqual(('user', 'a b'),urllib.splitpasswd('user:a b'))
765 self.assertEqual(('user 2', 'ab'),urllib.splitpasswd('user 2:ab'))
766 self.assertEqual(('user+1', 'a+b'),urllib.splitpasswd('user+1:a+b'))
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000767
768
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000769class URLopener_Tests(unittest.TestCase):
770 """Testcase to test the open method of URLopener class."""
771
772 def test_quoted_open(self):
773 class DummyURLopener(urllib.URLopener):
774 def open_spam(self, url):
775 return url
776
777 self.assertEqual(DummyURLopener().open(
778 'spam://example/ /'),'//example/%20/')
779
Senthil Kumaran18d5a692010-02-20 22:05:34 +0000780 # test the safe characters are not quoted by urlopen
781 self.assertEqual(DummyURLopener().open(
782 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
783 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
784
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000785
Facundo Batistad9880d02007-05-25 04:20:22 +0000786# Just commented them out.
787# Can't really tell why keep failing in windows and sparc.
Ezio Melottic2077b02011-03-16 12:34:31 +0200788# Everywhere else they work ok, but on those machines, sometimes
Facundo Batistad9880d02007-05-25 04:20:22 +0000789# fail in one of the tests, sometimes in other. I have a linux, and
790# the tests go ok.
791# If anybody has one of the problematic enviroments, please help!
792# . Facundo
793#
794# def server(evt):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000795# import socket, time
Facundo Batistad9880d02007-05-25 04:20:22 +0000796# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
797# serv.settimeout(3)
798# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
799# serv.bind(("", 9093))
800# serv.listen(5)
801# try:
802# conn, addr = serv.accept()
803# conn.send("1 Hola mundo\n")
804# cantdata = 0
805# while cantdata < 13:
806# data = conn.recv(13-cantdata)
807# cantdata += len(data)
808# time.sleep(.3)
809# conn.send("2 No more lines\n")
810# conn.close()
811# except socket.timeout:
812# pass
813# finally:
814# serv.close()
815# evt.set()
816#
817# class FTPWrapperTests(unittest.TestCase):
818#
819# def setUp(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000820# import ftplib, time, threading
Facundo Batistad9880d02007-05-25 04:20:22 +0000821# ftplib.FTP.port = 9093
822# self.evt = threading.Event()
823# threading.Thread(target=server, args=(self.evt,)).start()
824# time.sleep(.1)
825#
826# def tearDown(self):
827# self.evt.wait()
828#
829# def testBasic(self):
830# # connects
831# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000832# ftp.close()
Facundo Batistad9880d02007-05-25 04:20:22 +0000833#
834# def testTimeoutNone(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000835# # global default timeout is ignored
836# import socket
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000837# self.assertTrue(socket.getdefaulttimeout() is None)
Facundo Batistad9880d02007-05-25 04:20:22 +0000838# socket.setdefaulttimeout(30)
839# try:
840# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
841# finally:
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000842# socket.setdefaulttimeout(None)
Facundo Batistad9880d02007-05-25 04:20:22 +0000843# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000844# ftp.close()
Facundo Batistad9880d02007-05-25 04:20:22 +0000845#
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000846# def testTimeoutDefault(self):
847# # global default timeout is used
848# import socket
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000849# self.assertTrue(socket.getdefaulttimeout() is None)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000850# socket.setdefaulttimeout(30)
851# try:
852# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
853# finally:
854# socket.setdefaulttimeout(None)
855# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
856# ftp.close()
857#
858# def testTimeoutValue(self):
859# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
860# timeout=30)
861# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
862# ftp.close()
Facundo Batista711a54e2007-05-24 17:50:54 +0000863
Skip Montanaro080c9972001-01-28 21:12:22 +0000864
865
Brett Cannon74bfd702003-04-25 09:39:47 +0000866def test_main():
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000867 import warnings
Brett Cannon672237d2008-09-09 00:49:16 +0000868 with warnings.catch_warnings():
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000869 warnings.filterwarnings('ignore', ".*urllib\.urlopen.*Python 3.0",
870 DeprecationWarning)
871 test_support.run_unittest(
872 urlopen_FileTests,
873 urlopen_HttpTests,
874 urlretrieve_FileTests,
Senthil Kumaran87e58552011-11-01 02:44:45 +0800875 urlretrieve_HttpTests,
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000876 ProxyTests,
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000877 QuotingTests,
878 UnquotingTests,
879 urlencode_Tests,
880 Pathname_Tests,
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000881 Utility_Tests,
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000882 URLopener_Tests,
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000883 #FTPWrapperTests,
884 )
Brett Cannon74bfd702003-04-25 09:39:47 +0000885
886
887
888if __name__ == '__main__':
889 test_main()