blob: 16febaec4490c88913436d1f242ac0aad9fec0f3 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton6102e292000-08-31 15:48:10 +00003import urllib
Hye-Shik Chang39aef792004-06-05 13:30:56 +00004import httplib
Brett Cannon74bfd702003-04-25 09:39:47 +00005import unittest
6from test import test_support
7import os
8import mimetools
Georg Brandl5a650a22005-08-26 08:51:34 +00009import tempfile
Hye-Shik Chang39aef792004-06-05 13:30:56 +000010import StringIO
Jeremy Hylton6102e292000-08-31 15:48:10 +000011
Brett Cannon74bfd702003-04-25 09:39:47 +000012def hexescape(char):
13 """Escape char as RFC 2396 specifies"""
14 hex_repr = hex(ord(char))[2:].upper()
15 if len(hex_repr) == 1:
16 hex_repr = "0%s" % hex_repr
17 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000018
Brett Cannon74bfd702003-04-25 09:39:47 +000019class urlopen_FileTests(unittest.TestCase):
20 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000021
Brett Cannon74bfd702003-04-25 09:39:47 +000022 Try to test as much functionality as possible so as to cut down on reliance
Andrew M. Kuchlingf1a2f9e2004-06-29 13:07:53 +000023 on connecting to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000024
Brett Cannon74bfd702003-04-25 09:39:47 +000025 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000026
Brett Cannon74bfd702003-04-25 09:39:47 +000027 def setUp(self):
28 """Setup of a temp file to use for testing"""
29 self.text = "test_urllib: %s\n" % self.__class__.__name__
Guido van Rossum51735b02003-04-25 15:01:05 +000030 FILE = file(test_support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000031 try:
32 FILE.write(self.text)
33 finally:
34 FILE.close()
35 self.pathname = test_support.TESTFN
36 self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000037
Brett Cannon74bfd702003-04-25 09:39:47 +000038 def tearDown(self):
39 """Shut down the open object"""
40 self.returned_obj.close()
Brett Cannon19691362003-04-29 05:08:06 +000041 os.remove(test_support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000042
Brett Cannon74bfd702003-04-25 09:39:47 +000043 def test_interface(self):
44 # Make sure object returned by urlopen() has the specified methods
45 for attr in ("read", "readline", "readlines", "fileno",
Georg Brandl9b0d46d2008-01-20 11:43:03 +000046 "close", "info", "geturl", "getcode", "__iter__"):
Benjamin Peterson5c8da862009-06-30 22:57:08 +000047 self.assertTrue(hasattr(self.returned_obj, attr),
Brett Cannon74bfd702003-04-25 09:39:47 +000048 "object returned by urlopen() lacks %s attribute" %
49 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000050
Brett Cannon74bfd702003-04-25 09:39:47 +000051 def test_read(self):
52 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000053
Brett Cannon74bfd702003-04-25 09:39:47 +000054 def test_readline(self):
55 self.assertEqual(self.text, self.returned_obj.readline())
56 self.assertEqual('', self.returned_obj.readline(),
57 "calling readline() after exhausting the file did not"
58 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000059
Brett Cannon74bfd702003-04-25 09:39:47 +000060 def test_readlines(self):
61 lines_list = self.returned_obj.readlines()
62 self.assertEqual(len(lines_list), 1,
63 "readlines() returned the wrong number of lines")
64 self.assertEqual(lines_list[0], self.text,
65 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000066
Brett Cannon74bfd702003-04-25 09:39:47 +000067 def test_fileno(self):
68 file_num = self.returned_obj.fileno()
Ezio Melottib0f5adc2010-01-24 16:58:36 +000069 self.assertIsInstance(file_num, int, "fileno() did not return an int")
Brett Cannon74bfd702003-04-25 09:39:47 +000070 self.assertEqual(os.read(file_num, len(self.text)), self.text,
71 "Reading on the file descriptor returned by fileno() "
72 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000073
Brett Cannon74bfd702003-04-25 09:39:47 +000074 def test_close(self):
75 # Test close() by calling it hear and then having it be called again
76 # by the tearDown() method for the test
77 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def test_info(self):
Ezio Melottib0f5adc2010-01-24 16:58:36 +000080 self.assertIsInstance(self.returned_obj.info(), mimetools.Message)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000081
Brett Cannon74bfd702003-04-25 09:39:47 +000082 def test_geturl(self):
83 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +000084
Georg Brandl9b0d46d2008-01-20 11:43:03 +000085 def test_getcode(self):
86 self.assertEqual(self.returned_obj.getcode(), None)
87
Brett Cannon74bfd702003-04-25 09:39:47 +000088 def test_iter(self):
89 # Test iterator
90 # Don't need to count number of iterations since test would fail the
91 # instant it returned anything beyond the first line from the
92 # comparison
93 for line in self.returned_obj.__iter__():
94 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +000095
Benjamin Peterson2c7470d2008-09-21 21:27:51 +000096class ProxyTests(unittest.TestCase):
97
98 def setUp(self):
Walter Dörwald4b965f62009-04-26 20:51:44 +000099 # Records changes to env vars
100 self.env = test_support.EnvironmentVarGuard()
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000101 # Delete all proxy related env vars
Senthil Kumaran7a2ee0b2010-01-08 19:20:25 +0000102 for k in os.environ.keys():
Walter Dörwald4b965f62009-04-26 20:51:44 +0000103 if 'proxy' in k.lower():
Senthil Kumarandc61ec32009-10-01 01:50:13 +0000104 self.env.unset(k)
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000105
106 def tearDown(self):
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000107 # Restore all proxy related env vars
Walter Dörwald4b965f62009-04-26 20:51:44 +0000108 self.env.__exit__()
109 del self.env
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000110
111 def test_getproxies_environment_keep_no_proxies(self):
Walter Dörwald4b965f62009-04-26 20:51:44 +0000112 self.env.set('NO_PROXY', 'localhost')
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000113 proxies = urllib.getproxies_environment()
114 # getproxies_environment use lowered case truncated (no '_proxy') keys
115 self.assertEquals('localhost', proxies['no'])
116
117
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000118class urlopen_HttpTests(unittest.TestCase):
119 """Test urlopen() opening a fake http connection."""
120
121 def fakehttp(self, fakedata):
122 class FakeSocket(StringIO.StringIO):
123 def sendall(self, str): pass
124 def makefile(self, mode, name): return self
125 def read(self, amt=None):
126 if self.closed: return ''
127 return StringIO.StringIO.read(self, amt)
128 def readline(self, length=None):
129 if self.closed: return ''
130 return StringIO.StringIO.readline(self, length)
131 class FakeHTTPConnection(httplib.HTTPConnection):
132 def connect(self):
133 self.sock = FakeSocket(fakedata)
134 assert httplib.HTTP._connection_class == httplib.HTTPConnection
135 httplib.HTTP._connection_class = FakeHTTPConnection
136
137 def unfakehttp(self):
138 httplib.HTTP._connection_class = httplib.HTTPConnection
139
140 def test_read(self):
141 self.fakehttp('Hello!')
142 try:
143 fp = urllib.urlopen("http://python.org/")
144 self.assertEqual(fp.readline(), 'Hello!')
145 self.assertEqual(fp.readline(), '')
Georg Brandl9b0d46d2008-01-20 11:43:03 +0000146 self.assertEqual(fp.geturl(), 'http://python.org/')
147 self.assertEqual(fp.getcode(), 200)
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000148 finally:
149 self.unfakehttp()
150
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000151 def test_read_bogus(self):
Kurt B. Kaiser0a112322008-01-02 05:23:38 +0000152 # urlopen() should raise IOError for many error codes.
Kurt B. Kaiser0f7c25d2008-01-02 04:11:28 +0000153 self.fakehttp('''HTTP/1.1 401 Authentication Required
154Date: Wed, 02 Jan 2008 03:03:54 GMT
155Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
156Connection: close
157Content-Type: text/html; charset=iso-8859-1
158''')
159 try:
160 self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
161 finally:
162 self.unfakehttp()
163
Georg Brandlf66b6032007-03-14 08:27:52 +0000164 def test_empty_socket(self):
Kurt B. Kaiser0a112322008-01-02 05:23:38 +0000165 # urlopen() raises IOError if the underlying socket does not send any
166 # data. (#1680230)
Georg Brandlf66b6032007-03-14 08:27:52 +0000167 self.fakehttp('')
168 try:
169 self.assertRaises(IOError, urllib.urlopen, 'http://something')
170 finally:
171 self.unfakehttp()
172
Brett Cannon19691362003-04-29 05:08:06 +0000173class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000174 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000175
Brett Cannon19691362003-04-29 05:08:06 +0000176 def setUp(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000177 # Create a list of temporary files. Each item in the list is a file
178 # name (absolute path or relative to the current working directory).
179 # All files in this list will be deleted in the tearDown method. Note,
180 # this only helps to makes sure temporary files get deleted, but it
181 # does nothing about trying to close files that may still be open. It
182 # is the responsibility of the developer to properly close files even
183 # when exceptional conditions occur.
184 self.tempFiles = []
185
Brett Cannon19691362003-04-29 05:08:06 +0000186 # Create a temporary file.
Georg Brandl5a650a22005-08-26 08:51:34 +0000187 self.registerFileForCleanUp(test_support.TESTFN)
Brett Cannon19691362003-04-29 05:08:06 +0000188 self.text = 'testing urllib.urlretrieve'
Georg Brandl5a650a22005-08-26 08:51:34 +0000189 try:
190 FILE = file(test_support.TESTFN, 'wb')
191 FILE.write(self.text)
192 FILE.close()
193 finally:
194 try: FILE.close()
195 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000196
197 def tearDown(self):
Georg Brandl5a650a22005-08-26 08:51:34 +0000198 # Delete the temporary files.
199 for each in self.tempFiles:
200 try: os.remove(each)
201 except: pass
202
203 def constructLocalFileUrl(self, filePath):
204 return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
205
206 def createNewTempFile(self, data=""):
207 """Creates a new temporary file containing the specified data,
208 registers the file for deletion during the test fixture tear down, and
209 returns the absolute path of the file."""
210
211 newFd, newFilePath = tempfile.mkstemp()
212 try:
213 self.registerFileForCleanUp(newFilePath)
214 newFile = os.fdopen(newFd, "wb")
215 newFile.write(data)
216 newFile.close()
217 finally:
218 try: newFile.close()
219 except: pass
220 return newFilePath
221
222 def registerFileForCleanUp(self, fileName):
223 self.tempFiles.append(fileName)
Brett Cannon19691362003-04-29 05:08:06 +0000224
225 def test_basic(self):
226 # Make sure that a local file just gets its own location returned and
227 # a headers value is returned.
228 result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
229 self.assertEqual(result[0], test_support.TESTFN)
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000230 self.assertIsInstance(result[1], mimetools.Message,
231 "did not get a mimetools.Message instance as "
232 "second returned value")
Brett Cannon19691362003-04-29 05:08:06 +0000233
234 def test_copy(self):
235 # Test that setting the filename argument works.
236 second_temp = "%s.2" % test_support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000237 self.registerFileForCleanUp(second_temp)
238 result = urllib.urlretrieve(self.constructLocalFileUrl(
239 test_support.TESTFN), second_temp)
Brett Cannon19691362003-04-29 05:08:06 +0000240 self.assertEqual(second_temp, result[0])
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000241 self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
Brett Cannon19691362003-04-29 05:08:06 +0000242 "made")
243 FILE = file(second_temp, 'rb')
244 try:
245 text = FILE.read()
Brett Cannon19691362003-04-29 05:08:06 +0000246 FILE.close()
Georg Brandl5a650a22005-08-26 08:51:34 +0000247 finally:
248 try: FILE.close()
249 except: pass
Brett Cannon19691362003-04-29 05:08:06 +0000250 self.assertEqual(self.text, text)
251
252 def test_reporthook(self):
253 # Make sure that the reporthook works.
254 def hooktester(count, block_size, total_size, count_holder=[0]):
Ezio Melottib0f5adc2010-01-24 16:58:36 +0000255 self.assertIsInstance(count, int)
256 self.assertIsInstance(block_size, int)
257 self.assertIsInstance(total_size, int)
Brett Cannon19691362003-04-29 05:08:06 +0000258 self.assertEqual(count, count_holder[0])
259 count_holder[0] = count_holder[0] + 1
260 second_temp = "%s.2" % test_support.TESTFN
Georg Brandl5a650a22005-08-26 08:51:34 +0000261 self.registerFileForCleanUp(second_temp)
262 urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
263 second_temp, hooktester)
264
265 def test_reporthook_0_bytes(self):
266 # Test on zero length file. Should call reporthook only 1 time.
267 report = []
268 def hooktester(count, block_size, total_size, _report=report):
269 _report.append((count, block_size, total_size))
270 srcFileName = self.createNewTempFile()
271 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
272 test_support.TESTFN, hooktester)
273 self.assertEqual(len(report), 1)
274 self.assertEqual(report[0][2], 0)
275
276 def test_reporthook_5_bytes(self):
277 # Test on 5 byte file. Should call reporthook only 2 times (once when
278 # the "network connection" is established and once when the block is
279 # read). Since the block size is 8192 bytes, only one block read is
280 # required to read the entire file.
281 report = []
282 def hooktester(count, block_size, total_size, _report=report):
283 _report.append((count, block_size, total_size))
284 srcFileName = self.createNewTempFile("x" * 5)
285 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
286 test_support.TESTFN, hooktester)
287 self.assertEqual(len(report), 2)
288 self.assertEqual(report[0][1], 8192)
289 self.assertEqual(report[0][2], 5)
290
291 def test_reporthook_8193_bytes(self):
292 # Test on 8193 byte file. Should call reporthook only 3 times (once
293 # when the "network connection" is established, once for the next 8192
294 # bytes, and once for the last byte).
295 report = []
296 def hooktester(count, block_size, total_size, _report=report):
297 _report.append((count, block_size, total_size))
298 srcFileName = self.createNewTempFile("x" * 8193)
299 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
300 test_support.TESTFN, hooktester)
301 self.assertEqual(len(report), 3)
302 self.assertEqual(report[0][1], 8192)
303 self.assertEqual(report[0][2], 8193)
Skip Montanaro080c9972001-01-28 21:12:22 +0000304
Brett Cannon74bfd702003-04-25 09:39:47 +0000305class QuotingTests(unittest.TestCase):
306 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000307
Brett Cannon74bfd702003-04-25 09:39:47 +0000308 According to RFC 2396 ("Uniform Resource Identifiers), to escape a
309 character you write it as '%' + <2 character US-ASCII hex value>. The Python
310 code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
311 Case does not matter on the hex letters.
312
313 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000314
Brett Cannon74bfd702003-04-25 09:39:47 +0000315 Reserved characters : ";/?:@&=+$,"
316 Have special meaning in URIs and must be escaped if not being used for
317 their special meaning
318 Data characters : letters, digits, and "-_.!~*'()"
319 Unreserved and do not need to be escaped; can be, though, if desired
320 Control characters : 0x00 - 0x1F, 0x7F
321 Have no use in URIs so must be escaped
322 space : 0x20
323 Must be escaped
324 Delimiters : '<>#%"'
325 Must be escaped
326 Unwise : "{}|\^[]`"
327 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000328
Brett Cannon74bfd702003-04-25 09:39:47 +0000329 """
330
331 def test_never_quote(self):
332 # Make sure quote() does not quote letters, digits, and "_,.-"
333 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
334 "abcdefghijklmnopqrstuvwxyz",
335 "0123456789",
336 "_.-"])
337 result = urllib.quote(do_not_quote)
338 self.assertEqual(do_not_quote, result,
339 "using quote(): %s != %s" % (do_not_quote, result))
340 result = urllib.quote_plus(do_not_quote)
341 self.assertEqual(do_not_quote, result,
342 "using quote_plus(): %s != %s" % (do_not_quote, result))
343
344 def test_default_safe(self):
345 # Test '/' is default value for 'safe' parameter
346 self.assertEqual(urllib.quote.func_defaults[0], '/')
347
348 def test_safe(self):
349 # Test setting 'safe' parameter does what it should do
350 quote_by_default = "<>"
351 result = urllib.quote(quote_by_default, safe=quote_by_default)
352 self.assertEqual(quote_by_default, result,
353 "using quote(): %s != %s" % (quote_by_default, result))
354 result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
355 self.assertEqual(quote_by_default, result,
356 "using quote_plus(): %s != %s" %
357 (quote_by_default, result))
Senthil Kumaran5dba6df2010-07-18 02:27:10 +0000358 # Safe expressed as unicode rather than str
359 result = urllib.quote(quote_by_default, safe=u"<>")
360 self.assertEqual(quote_by_default, result,
361 "using quote(): %r != %r" % (quote_by_default, result))
362 # "Safe" non-ASCII bytes should still work
363 # (Technically disallowed by the URI standard, but allowed for
364 # backwards compatibility with previous versions of Python)
365 result = urllib.quote(b"a\xfcb", safe=b"\xfc")
366 expect = b"a\xfcb"
367 self.assertEqual(expect, result,
368 "using quote(): %r != %r" %
369 (expect, result))
370 # Same as above, but with 'safe' as a unicode rather than str
371 # "Safe" non-ASCII unicode characters should have no effect
372 # (Since URIs are not allowed to have non-ASCII characters)
373 result = urllib.quote(b"a\xfcb", safe=u"\xfc")
374 expect = urllib.quote(b"a\xfcb", safe="")
375 self.assertEqual(expect, result,
376 "using quote(): %r != %r" %
377 (expect, result))
378 # Same as above, but quoting a unicode rather than a str
379 result = urllib.quote(u"a\xfcb", encoding="latin-1", safe=b"\xfc")
380 expect = b"a\xfcb"
381 self.assertEqual(expect, result,
382 "using quote(): %r != %r" %
383 (expect, result))
384 # Same as above, but with both the quoted value and 'safe' as unicode
385 result = urllib.quote(u"a\xfcb", encoding="latin-1", safe=u"\xfc")
386 expect = urllib.quote(u"a\xfcb", encoding="latin-1", safe="")
387 self.assertEqual(expect, result,
388 "using quote(): %r != %r" %
389 (expect, result))
Brett Cannon74bfd702003-04-25 09:39:47 +0000390
391 def test_default_quoting(self):
392 # Make sure all characters that should be quoted are by default sans
393 # space (separate test for that).
394 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
395 should_quote.append('<>#%"{}|\^[]`')
396 should_quote.append(chr(127)) # For 0x7F
397 should_quote = ''.join(should_quote)
398 for char in should_quote:
399 result = urllib.quote(char)
400 self.assertEqual(hexescape(char), result,
401 "using quote(): %s should be escaped to %s, not %s" %
402 (char, hexescape(char), result))
403 result = urllib.quote_plus(char)
404 self.assertEqual(hexescape(char), result,
405 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000406 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000407 (char, hexescape(char), result))
408 del should_quote
409 partial_quote = "ab[]cd"
410 expected = "ab%5B%5Dcd"
411 result = urllib.quote(partial_quote)
412 self.assertEqual(expected, result,
413 "using quote(): %s != %s" % (expected, result))
414 self.assertEqual(expected, result,
415 "using quote_plus(): %s != %s" % (expected, result))
Senthil Kumaranc7743aa2010-07-19 17:35:50 +0000416 self.assertRaises(TypeError, urllib.quote, None)
Brett Cannon74bfd702003-04-25 09:39:47 +0000417
418 def test_quoting_space(self):
419 # Make sure quote() and quote_plus() handle spaces as specified in
420 # their unique way
421 result = urllib.quote(' ')
422 self.assertEqual(result, hexescape(' '),
423 "using quote(): %s != %s" % (result, hexescape(' ')))
424 result = urllib.quote_plus(' ')
425 self.assertEqual(result, '+',
426 "using quote_plus(): %s != +" % result)
427 given = "a b cd e f"
428 expect = given.replace(' ', hexescape(' '))
429 result = urllib.quote(given)
430 self.assertEqual(expect, result,
431 "using quote(): %s != %s" % (expect, result))
432 expect = given.replace(' ', '+')
433 result = urllib.quote_plus(given)
434 self.assertEqual(expect, result,
435 "using quote_plus(): %s != %s" % (expect, result))
436
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000437 def test_quoting_plus(self):
438 self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
439 'alpha%2Bbeta+gamma')
440 self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
441 'alpha+beta+gamma')
Senthil Kumaran5dba6df2010-07-18 02:27:10 +0000442 # Test with unicode
443 self.assertEqual(urllib.quote_plus(u'alpha+beta gamma'),
444 'alpha%2Bbeta+gamma')
445 # Test with safe unicode
446 self.assertEqual(urllib.quote_plus('alpha+beta gamma', u'+'),
447 'alpha+beta+gamma')
448
449 def test_quote_bytes(self):
450 # Non-ASCII bytes should quote directly to percent-encoded values
451 given = b"\xa2\xd8ab\xff"
452 expect = "%A2%D8ab%FF"
453 result = urllib.quote(given)
454 self.assertEqual(expect, result,
455 "using quote(): %r != %r" % (expect, result))
456 # Encoding argument should raise UnicodeDecodeError on bytes input
457 # with non-ASCII characters (just as with str.encode).
458 self.assertRaises(UnicodeDecodeError, urllib.quote, given,
459 encoding="latin-1")
460
461 def test_quote_with_unicode(self):
462 # Characters in Latin-1 range, encoded by default in UTF-8
463 given = u"\xa2\xd8ab\xff"
464 expect = "%C2%A2%C3%98ab%C3%BF"
465 result = urllib.quote(given)
466 self.assertEqual(expect, result,
467 "using quote(): %r != %r" % (expect, result))
468 # Characters in Latin-1 range, encoded by with None (default)
469 result = urllib.quote(given, encoding=None, errors=None)
470 self.assertEqual(expect, result,
471 "using quote(): %r != %r" % (expect, result))
472 # Characters in Latin-1 range, encoded with Latin-1
473 given = u"\xa2\xd8ab\xff"
474 expect = "%A2%D8ab%FF"
475 result = urllib.quote(given, encoding="latin-1")
476 self.assertEqual(expect, result,
477 "using quote(): %r != %r" % (expect, result))
478 # Characters in BMP, encoded by default in UTF-8
479 given = u"\u6f22\u5b57" # "Kanji"
480 expect = "%E6%BC%A2%E5%AD%97"
481 result = urllib.quote(given)
482 self.assertEqual(expect, result,
483 "using quote(): %r != %r" % (expect, result))
484 # Characters in BMP, encoded with Latin-1
485 given = u"\u6f22\u5b57"
486 self.assertRaises(UnicodeEncodeError, urllib.quote, given,
487 encoding="latin-1")
488 # Characters in BMP, encoded with Latin-1, with replace error handling
489 given = u"\u6f22\u5b57"
490 expect = "%3F%3F" # "??"
491 result = urllib.quote(given, encoding="latin-1",
492 errors="replace")
493 self.assertEqual(expect, result,
494 "using quote(): %r != %r" % (expect, result))
495 # Characters in BMP, Latin-1, with xmlcharref error handling
496 given = u"\u6f22\u5b57"
497 expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
498 result = urllib.quote(given, encoding="latin-1",
499 errors="xmlcharrefreplace")
500 self.assertEqual(expect, result,
501 "using quote(): %r != %r" % (expect, result))
502
503 def test_quote_plus_with_unicode(self):
504 # Encoding (latin-1) test for quote_plus
505 given = u"\xa2\xd8 \xff"
506 expect = "%A2%D8+%FF"
507 result = urllib.quote_plus(given, encoding="latin-1")
508 self.assertEqual(expect, result,
509 "using quote_plus(): %r != %r" % (expect, result))
510 # Errors test for quote_plus
511 given = u"ab\u6f22\u5b57 cd"
512 expect = "ab%3F%3F+cd"
513 result = urllib.quote_plus(given, encoding="latin-1",
514 errors="replace")
515 self.assertEqual(expect, result,
516 "using quote_plus(): %r != %r" % (expect, result))
Raymond Hettinger2bdec7b2005-09-10 14:30:09 +0000517
Brett Cannon74bfd702003-04-25 09:39:47 +0000518class UnquotingTests(unittest.TestCase):
519 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000520
Brett Cannon74bfd702003-04-25 09:39:47 +0000521 See the doc string for quoting_Tests for details on quoting and such.
522
523 """
524
525 def test_unquoting(self):
526 # Make sure unquoting of all ASCII values works
527 escape_list = []
528 for num in range(128):
529 given = hexescape(chr(num))
530 expect = chr(num)
531 result = urllib.unquote(given)
532 self.assertEqual(expect, result,
533 "using unquote(): %s != %s" % (expect, result))
534 result = urllib.unquote_plus(given)
535 self.assertEqual(expect, result,
536 "using unquote_plus(): %s != %s" %
537 (expect, result))
538 escape_list.append(given)
539 escape_string = ''.join(escape_list)
540 del escape_list
541 result = urllib.unquote(escape_string)
542 self.assertEqual(result.count('%'), 1,
543 "using quote(): not all characters escaped; %s" %
544 result)
545 result = urllib.unquote(escape_string)
546 self.assertEqual(result.count('%'), 1,
547 "using unquote(): not all characters escaped: "
548 "%s" % result)
549
Senthil Kumaranf3e9b2a2010-03-18 12:14:15 +0000550 def test_unquoting_badpercent(self):
551 # Test unquoting on bad percent-escapes
552 given = '%xab'
553 expect = given
554 result = urllib.unquote(given)
555 self.assertEqual(expect, result, "using unquote(): %r != %r"
556 % (expect, result))
557 given = '%x'
558 expect = given
559 result = urllib.unquote(given)
560 self.assertEqual(expect, result, "using unquote(): %r != %r"
561 % (expect, result))
562 given = '%'
563 expect = given
564 result = urllib.unquote(given)
565 self.assertEqual(expect, result, "using unquote(): %r != %r"
566 % (expect, result))
567
568 def test_unquoting_mixed_case(self):
569 # Test unquoting on mixed-case hex digits in the percent-escapes
570 given = '%Ab%eA'
571 expect = '\xab\xea'
572 result = urllib.unquote(given)
573 self.assertEqual(expect, result, "using unquote(): %r != %r"
574 % (expect, result))
575
Brett Cannon74bfd702003-04-25 09:39:47 +0000576 def test_unquoting_parts(self):
577 # Make sure unquoting works when have non-quoted characters
578 # interspersed
579 given = 'ab%sd' % hexescape('c')
580 expect = "abcd"
581 result = urllib.unquote(given)
582 self.assertEqual(expect, result,
583 "using quote(): %s != %s" % (expect, result))
584 result = urllib.unquote_plus(given)
585 self.assertEqual(expect, result,
586 "using unquote_plus(): %s != %s" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000587
Brett Cannon74bfd702003-04-25 09:39:47 +0000588 def test_unquoting_plus(self):
589 # Test difference between unquote() and unquote_plus()
590 given = "are+there+spaces..."
591 expect = given
592 result = urllib.unquote(given)
593 self.assertEqual(expect, result,
594 "using unquote(): %s != %s" % (expect, result))
595 expect = given.replace('+', ' ')
596 result = urllib.unquote_plus(given)
597 self.assertEqual(expect, result,
598 "using unquote_plus(): %s != %s" % (expect, result))
599
Raymond Hettinger4b0f20d2005-10-15 16:41:53 +0000600 def test_unquote_with_unicode(self):
601 r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
602 self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
603
Brett Cannon74bfd702003-04-25 09:39:47 +0000604class urlencode_Tests(unittest.TestCase):
605 """Tests for urlencode()"""
606
607 def help_inputtype(self, given, test_type):
608 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000609
Brett Cannon74bfd702003-04-25 09:39:47 +0000610 'given' must lead to only the pairs:
611 * 1st, 1
612 * 2nd, 2
613 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000614
Brett Cannon74bfd702003-04-25 09:39:47 +0000615 Test cannot assume anything about order. Docs make no guarantee and
616 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000617
Brett Cannon74bfd702003-04-25 09:39:47 +0000618 """
619 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
620 result = urllib.urlencode(given)
621 for expected in expect_somewhere:
Ezio Melottiaa980582010-01-23 23:04:36 +0000622 self.assertIn(expected, result,
Brett Cannon74bfd702003-04-25 09:39:47 +0000623 "testing %s: %s not found in %s" %
624 (test_type, expected, result))
625 self.assertEqual(result.count('&'), 2,
626 "testing %s: expected 2 '&'s; got %s" %
627 (test_type, result.count('&')))
628 amp_location = result.index('&')
629 on_amp_left = result[amp_location - 1]
630 on_amp_right = result[amp_location + 1]
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000631 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
Brett Cannon74bfd702003-04-25 09:39:47 +0000632 "testing %s: '&' not located in proper place in %s" %
633 (test_type, result))
634 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
635 "testing %s: "
636 "unexpected number of characters: %s != %s" %
637 (test_type, len(result), (5 * 3) + 2))
638
639 def test_using_mapping(self):
640 # Test passing in a mapping object as an argument.
641 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
642 "using dict as input type")
643
644 def test_using_sequence(self):
645 # Test passing in a sequence of two-item sequences as an argument.
646 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
647 "using sequence of two-item tuples as input")
648
649 def test_quoting(self):
650 # Make sure keys and values are quoted using quote_plus()
651 given = {"&":"="}
652 expect = "%s=%s" % (hexescape('&'), hexescape('='))
653 result = urllib.urlencode(given)
654 self.assertEqual(expect, result)
655 given = {"key name":"A bunch of pluses"}
656 expect = "key+name=A+bunch+of+pluses"
657 result = urllib.urlencode(given)
658 self.assertEqual(expect, result)
659
660 def test_doseq(self):
661 # Test that passing True for 'doseq' parameter works correctly
662 given = {'sequence':['1', '2', '3']}
663 expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
664 result = urllib.urlencode(given)
665 self.assertEqual(expect, result)
666 result = urllib.urlencode(given, True)
667 for value in given["sequence"]:
668 expect = "sequence=%s" % value
Ezio Melottiaa980582010-01-23 23:04:36 +0000669 self.assertIn(expect, result)
Brett Cannon74bfd702003-04-25 09:39:47 +0000670 self.assertEqual(result.count('&'), 2,
671 "Expected 2 '&'s, got %s" % result.count('&'))
672
673class Pathname_Tests(unittest.TestCase):
674 """Test pathname2url() and url2pathname()"""
675
676 def test_basic(self):
677 # Make sure simple tests pass
678 expected_path = os.path.join("parts", "of", "a", "path")
679 expected_url = "parts/of/a/path"
680 result = urllib.pathname2url(expected_path)
681 self.assertEqual(expected_url, result,
682 "pathname2url() failed; %s != %s" %
683 (result, expected_url))
684 result = urllib.url2pathname(expected_url)
685 self.assertEqual(expected_path, result,
686 "url2pathame() failed; %s != %s" %
687 (result, expected_path))
688
689 def test_quoting(self):
690 # Test automatic quoting and unquoting works for pathnam2url() and
691 # url2pathname() respectively
692 given = os.path.join("needs", "quot=ing", "here")
693 expect = "needs/%s/here" % urllib.quote("quot=ing")
694 result = urllib.pathname2url(given)
695 self.assertEqual(expect, result,
696 "pathname2url() failed; %s != %s" %
697 (expect, result))
698 expect = given
699 result = urllib.url2pathname(result)
700 self.assertEqual(expect, result,
701 "url2pathname() failed; %s != %s" %
702 (expect, result))
703 given = os.path.join("make sure", "using_quote")
704 expect = "%s/using_quote" % urllib.quote("make sure")
705 result = urllib.pathname2url(given)
706 self.assertEqual(expect, result,
707 "pathname2url() failed; %s != %s" %
708 (expect, result))
709 given = "make+sure/using_unquote"
710 expect = os.path.join("make+sure", "using_unquote")
711 result = urllib.url2pathname(given)
712 self.assertEqual(expect, result,
713 "url2pathname() failed; %s != %s" %
714 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000715
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000716class Utility_Tests(unittest.TestCase):
717 """Testcase to test the various utility functions in the urllib."""
718
719 def test_splitpasswd(self):
720 """Some of the password examples are not sensible, but it is added to
721 confirming to RFC2617 and addressing issue4675.
722 """
723 self.assertEqual(('user', 'ab'),urllib.splitpasswd('user:ab'))
724 self.assertEqual(('user', 'a\nb'),urllib.splitpasswd('user:a\nb'))
725 self.assertEqual(('user', 'a\tb'),urllib.splitpasswd('user:a\tb'))
726 self.assertEqual(('user', 'a\rb'),urllib.splitpasswd('user:a\rb'))
727 self.assertEqual(('user', 'a\fb'),urllib.splitpasswd('user:a\fb'))
728 self.assertEqual(('user', 'a\vb'),urllib.splitpasswd('user:a\vb'))
729 self.assertEqual(('user', 'a:b'),urllib.splitpasswd('user:a:b'))
730
731
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000732class URLopener_Tests(unittest.TestCase):
733 """Testcase to test the open method of URLopener class."""
734
735 def test_quoted_open(self):
736 class DummyURLopener(urllib.URLopener):
737 def open_spam(self, url):
738 return url
739
740 self.assertEqual(DummyURLopener().open(
741 'spam://example/ /'),'//example/%20/')
742
Senthil Kumaran18d5a692010-02-20 22:05:34 +0000743 # test the safe characters are not quoted by urlopen
744 self.assertEqual(DummyURLopener().open(
745 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
746 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
747
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000748
Facundo Batistad9880d02007-05-25 04:20:22 +0000749# Just commented them out.
750# Can't really tell why keep failing in windows and sparc.
751# Everywhere else they work ok, but on those machines, someteimes
752# fail in one of the tests, sometimes in other. I have a linux, and
753# the tests go ok.
754# If anybody has one of the problematic enviroments, please help!
755# . Facundo
756#
757# def server(evt):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000758# import socket, time
Facundo Batistad9880d02007-05-25 04:20:22 +0000759# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
760# serv.settimeout(3)
761# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
762# serv.bind(("", 9093))
763# serv.listen(5)
764# try:
765# conn, addr = serv.accept()
766# conn.send("1 Hola mundo\n")
767# cantdata = 0
768# while cantdata < 13:
769# data = conn.recv(13-cantdata)
770# cantdata += len(data)
771# time.sleep(.3)
772# conn.send("2 No more lines\n")
773# conn.close()
774# except socket.timeout:
775# pass
776# finally:
777# serv.close()
778# evt.set()
779#
780# class FTPWrapperTests(unittest.TestCase):
781#
782# def setUp(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000783# import ftplib, time, threading
Facundo Batistad9880d02007-05-25 04:20:22 +0000784# ftplib.FTP.port = 9093
785# self.evt = threading.Event()
786# threading.Thread(target=server, args=(self.evt,)).start()
787# time.sleep(.1)
788#
789# def tearDown(self):
790# self.evt.wait()
791#
792# def testBasic(self):
793# # connects
794# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000795# ftp.close()
Facundo Batistad9880d02007-05-25 04:20:22 +0000796#
797# def testTimeoutNone(self):
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000798# # global default timeout is ignored
799# import socket
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000800# self.assertTrue(socket.getdefaulttimeout() is None)
Facundo Batistad9880d02007-05-25 04:20:22 +0000801# socket.setdefaulttimeout(30)
802# try:
803# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
804# finally:
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000805# socket.setdefaulttimeout(None)
Facundo Batistad9880d02007-05-25 04:20:22 +0000806# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000807# ftp.close()
Facundo Batistad9880d02007-05-25 04:20:22 +0000808#
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000809# def testTimeoutDefault(self):
810# # global default timeout is used
811# import socket
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000812# self.assertTrue(socket.getdefaulttimeout() is None)
Facundo Batista4f1b1ed2008-05-29 16:39:26 +0000813# socket.setdefaulttimeout(30)
814# try:
815# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
816# finally:
817# socket.setdefaulttimeout(None)
818# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
819# ftp.close()
820#
821# def testTimeoutValue(self):
822# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
823# timeout=30)
824# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
825# ftp.close()
Facundo Batista711a54e2007-05-24 17:50:54 +0000826
Skip Montanaro080c9972001-01-28 21:12:22 +0000827
828
Brett Cannon74bfd702003-04-25 09:39:47 +0000829def test_main():
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000830 import warnings
Brett Cannon672237d2008-09-09 00:49:16 +0000831 with warnings.catch_warnings():
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000832 warnings.filterwarnings('ignore', ".*urllib\.urlopen.*Python 3.0",
833 DeprecationWarning)
834 test_support.run_unittest(
835 urlopen_FileTests,
836 urlopen_HttpTests,
837 urlretrieve_FileTests,
Benjamin Peterson2c7470d2008-09-21 21:27:51 +0000838 ProxyTests,
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000839 QuotingTests,
840 UnquotingTests,
841 urlencode_Tests,
842 Pathname_Tests,
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000843 Utility_Tests,
Senthil Kumaran7c2867f2009-04-21 03:24:19 +0000844 URLopener_Tests,
Brett Cannon8bb8fa52008-07-02 01:57:08 +0000845 #FTPWrapperTests,
846 )
Brett Cannon74bfd702003-04-25 09:39:47 +0000847
848
849
850if __name__ == '__main__':
851 test_main()