blob: a7ada274f4faea87abe98313b3d19a1fe55be5a5 [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton6102e292000-08-31 15:48:10 +00003import urllib
Hye-Shik Chang39aef792004-06-05 13:30:56 +00004import httplib
Brett Cannon74bfd702003-04-25 09:39:47 +00005import unittest
6from test import test_support
7import os
8import mimetools
Hye-Shik Chang39aef792004-06-05 13:30:56 +00009import StringIO
Jeremy Hylton6102e292000-08-31 15:48:10 +000010
Brett Cannon74bfd702003-04-25 09:39:47 +000011def hexescape(char):
12 """Escape char as RFC 2396 specifies"""
13 hex_repr = hex(ord(char))[2:].upper()
14 if len(hex_repr) == 1:
15 hex_repr = "0%s" % hex_repr
16 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000017
Brett Cannon74bfd702003-04-25 09:39:47 +000018class urlopen_FileTests(unittest.TestCase):
19 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000020
Brett Cannon74bfd702003-04-25 09:39:47 +000021 Try to test as much functionality as possible so as to cut down on reliance
22 on connect to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000023
Brett Cannon74bfd702003-04-25 09:39:47 +000024 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000025
Brett Cannon74bfd702003-04-25 09:39:47 +000026 def setUp(self):
27 """Setup of a temp file to use for testing"""
28 self.text = "test_urllib: %s\n" % self.__class__.__name__
Guido van Rossum51735b02003-04-25 15:01:05 +000029 FILE = file(test_support.TESTFN, 'wb')
Brett Cannon74bfd702003-04-25 09:39:47 +000030 try:
31 FILE.write(self.text)
32 finally:
33 FILE.close()
34 self.pathname = test_support.TESTFN
35 self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000036
Brett Cannon74bfd702003-04-25 09:39:47 +000037 def tearDown(self):
38 """Shut down the open object"""
39 self.returned_obj.close()
Brett Cannon19691362003-04-29 05:08:06 +000040 os.remove(test_support.TESTFN)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000041
Brett Cannon74bfd702003-04-25 09:39:47 +000042 def test_interface(self):
43 # Make sure object returned by urlopen() has the specified methods
44 for attr in ("read", "readline", "readlines", "fileno",
45 "close", "info", "geturl", "__iter__"):
46 self.assert_(hasattr(self.returned_obj, attr),
47 "object returned by urlopen() lacks %s attribute" %
48 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000049
Brett Cannon74bfd702003-04-25 09:39:47 +000050 def test_read(self):
51 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000052
Brett Cannon74bfd702003-04-25 09:39:47 +000053 def test_readline(self):
54 self.assertEqual(self.text, self.returned_obj.readline())
55 self.assertEqual('', self.returned_obj.readline(),
56 "calling readline() after exhausting the file did not"
57 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000058
Brett Cannon74bfd702003-04-25 09:39:47 +000059 def test_readlines(self):
60 lines_list = self.returned_obj.readlines()
61 self.assertEqual(len(lines_list), 1,
62 "readlines() returned the wrong number of lines")
63 self.assertEqual(lines_list[0], self.text,
64 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000065
Brett Cannon74bfd702003-04-25 09:39:47 +000066 def test_fileno(self):
67 file_num = self.returned_obj.fileno()
68 self.assert_(isinstance(file_num, int),
69 "fileno() did not return an int")
70 self.assertEqual(os.read(file_num, len(self.text)), self.text,
71 "Reading on the file descriptor returned by fileno() "
72 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000073
Brett Cannon74bfd702003-04-25 09:39:47 +000074 def test_close(self):
75 # Test close() by calling it hear and then having it be called again
76 # by the tearDown() method for the test
77 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def test_info(self):
80 self.assert_(isinstance(self.returned_obj.info(), mimetools.Message))
Skip Montanaroe78b92a2001-01-20 20:22:30 +000081
Brett Cannon74bfd702003-04-25 09:39:47 +000082 def test_geturl(self):
83 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +000084
Brett Cannon74bfd702003-04-25 09:39:47 +000085 def test_iter(self):
86 # Test iterator
87 # Don't need to count number of iterations since test would fail the
88 # instant it returned anything beyond the first line from the
89 # comparison
90 for line in self.returned_obj.__iter__():
91 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +000092
Hye-Shik Chang39aef792004-06-05 13:30:56 +000093class urlopen_HttpTests(unittest.TestCase):
94 """Test urlopen() opening a fake http connection."""
95
96 def fakehttp(self, fakedata):
97 class FakeSocket(StringIO.StringIO):
98 def sendall(self, str): pass
99 def makefile(self, mode, name): return self
100 def read(self, amt=None):
101 if self.closed: return ''
102 return StringIO.StringIO.read(self, amt)
103 def readline(self, length=None):
104 if self.closed: return ''
105 return StringIO.StringIO.readline(self, length)
106 class FakeHTTPConnection(httplib.HTTPConnection):
107 def connect(self):
108 self.sock = FakeSocket(fakedata)
109 assert httplib.HTTP._connection_class == httplib.HTTPConnection
110 httplib.HTTP._connection_class = FakeHTTPConnection
111
112 def unfakehttp(self):
113 httplib.HTTP._connection_class = httplib.HTTPConnection
114
115 def test_read(self):
116 self.fakehttp('Hello!')
117 try:
118 fp = urllib.urlopen("http://python.org/")
119 self.assertEqual(fp.readline(), 'Hello!')
120 self.assertEqual(fp.readline(), '')
121 finally:
122 self.unfakehttp()
123
Brett Cannon19691362003-04-29 05:08:06 +0000124class urlretrieve_FileTests(unittest.TestCase):
Brett Cannon74bfd702003-04-25 09:39:47 +0000125 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +0000126
Brett Cannon19691362003-04-29 05:08:06 +0000127 def setUp(self):
128 # Create a temporary file.
129 self.text = 'testing urllib.urlretrieve'
130 FILE = file(test_support.TESTFN, 'wb')
131 FILE.write(self.text)
132 FILE.close()
133
134 def tearDown(self):
135 # Delete the temporary file.
136 os.remove(test_support.TESTFN)
137
138 def test_basic(self):
139 # Make sure that a local file just gets its own location returned and
140 # a headers value is returned.
141 result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
142 self.assertEqual(result[0], test_support.TESTFN)
143 self.assert_(isinstance(result[1], mimetools.Message),
144 "did not get a mimetools.Message instance as second "
145 "returned value")
146
147 def test_copy(self):
148 # Test that setting the filename argument works.
149 second_temp = "%s.2" % test_support.TESTFN
150 result = urllib.urlretrieve("file:%s" % test_support.TESTFN, second_temp)
151 self.assertEqual(second_temp, result[0])
152 self.assert_(os.path.exists(second_temp), "copy of the file was not "
153 "made")
154 FILE = file(second_temp, 'rb')
155 try:
156 text = FILE.read()
157 finally:
158 FILE.close()
159 self.assertEqual(self.text, text)
160
161 def test_reporthook(self):
162 # Make sure that the reporthook works.
163 def hooktester(count, block_size, total_size, count_holder=[0]):
164 self.assert_(isinstance(count, int))
165 self.assert_(isinstance(block_size, int))
166 self.assert_(isinstance(total_size, int))
167 self.assertEqual(count, count_holder[0])
168 count_holder[0] = count_holder[0] + 1
169 second_temp = "%s.2" % test_support.TESTFN
170 urllib.urlretrieve(test_support.TESTFN, second_temp, hooktester)
171 os.remove(second_temp)
Skip Montanaro080c9972001-01-28 21:12:22 +0000172
Brett Cannon74bfd702003-04-25 09:39:47 +0000173class QuotingTests(unittest.TestCase):
174 """Tests for urllib.quote() and urllib.quote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000175
Brett Cannon74bfd702003-04-25 09:39:47 +0000176 According to RFC 2396 ("Uniform Resource Identifiers), to escape a
177 character you write it as '%' + <2 character US-ASCII hex value>. The Python
178 code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
179 Case does not matter on the hex letters.
180
181 The various character sets specified are:
Tim Petersc2659cf2003-05-12 20:19:37 +0000182
Brett Cannon74bfd702003-04-25 09:39:47 +0000183 Reserved characters : ";/?:@&=+$,"
184 Have special meaning in URIs and must be escaped if not being used for
185 their special meaning
186 Data characters : letters, digits, and "-_.!~*'()"
187 Unreserved and do not need to be escaped; can be, though, if desired
188 Control characters : 0x00 - 0x1F, 0x7F
189 Have no use in URIs so must be escaped
190 space : 0x20
191 Must be escaped
192 Delimiters : '<>#%"'
193 Must be escaped
194 Unwise : "{}|\^[]`"
195 Must be escaped
Tim Petersc2659cf2003-05-12 20:19:37 +0000196
Brett Cannon74bfd702003-04-25 09:39:47 +0000197 """
198
199 def test_never_quote(self):
200 # Make sure quote() does not quote letters, digits, and "_,.-"
201 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
202 "abcdefghijklmnopqrstuvwxyz",
203 "0123456789",
204 "_.-"])
205 result = urllib.quote(do_not_quote)
206 self.assertEqual(do_not_quote, result,
207 "using quote(): %s != %s" % (do_not_quote, result))
208 result = urllib.quote_plus(do_not_quote)
209 self.assertEqual(do_not_quote, result,
210 "using quote_plus(): %s != %s" % (do_not_quote, result))
211
212 def test_default_safe(self):
213 # Test '/' is default value for 'safe' parameter
214 self.assertEqual(urllib.quote.func_defaults[0], '/')
215
216 def test_safe(self):
217 # Test setting 'safe' parameter does what it should do
218 quote_by_default = "<>"
219 result = urllib.quote(quote_by_default, safe=quote_by_default)
220 self.assertEqual(quote_by_default, result,
221 "using quote(): %s != %s" % (quote_by_default, result))
222 result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
223 self.assertEqual(quote_by_default, result,
224 "using quote_plus(): %s != %s" %
225 (quote_by_default, result))
226
227 def test_default_quoting(self):
228 # Make sure all characters that should be quoted are by default sans
229 # space (separate test for that).
230 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
231 should_quote.append('<>#%"{}|\^[]`')
232 should_quote.append(chr(127)) # For 0x7F
233 should_quote = ''.join(should_quote)
234 for char in should_quote:
235 result = urllib.quote(char)
236 self.assertEqual(hexescape(char), result,
237 "using quote(): %s should be escaped to %s, not %s" %
238 (char, hexescape(char), result))
239 result = urllib.quote_plus(char)
240 self.assertEqual(hexescape(char), result,
241 "using quote_plus(): "
Tim Petersc2659cf2003-05-12 20:19:37 +0000242 "%s should be escapes to %s, not %s" %
Brett Cannon74bfd702003-04-25 09:39:47 +0000243 (char, hexescape(char), result))
244 del should_quote
245 partial_quote = "ab[]cd"
246 expected = "ab%5B%5Dcd"
247 result = urllib.quote(partial_quote)
248 self.assertEqual(expected, result,
249 "using quote(): %s != %s" % (expected, result))
250 self.assertEqual(expected, result,
251 "using quote_plus(): %s != %s" % (expected, result))
252
253 def test_quoting_space(self):
254 # Make sure quote() and quote_plus() handle spaces as specified in
255 # their unique way
256 result = urllib.quote(' ')
257 self.assertEqual(result, hexescape(' '),
258 "using quote(): %s != %s" % (result, hexescape(' ')))
259 result = urllib.quote_plus(' ')
260 self.assertEqual(result, '+',
261 "using quote_plus(): %s != +" % result)
262 given = "a b cd e f"
263 expect = given.replace(' ', hexescape(' '))
264 result = urllib.quote(given)
265 self.assertEqual(expect, result,
266 "using quote(): %s != %s" % (expect, result))
267 expect = given.replace(' ', '+')
268 result = urllib.quote_plus(given)
269 self.assertEqual(expect, result,
270 "using quote_plus(): %s != %s" % (expect, result))
271
272class UnquotingTests(unittest.TestCase):
273 """Tests for unquote() and unquote_plus()
Tim Petersc2659cf2003-05-12 20:19:37 +0000274
Brett Cannon74bfd702003-04-25 09:39:47 +0000275 See the doc string for quoting_Tests for details on quoting and such.
276
277 """
278
279 def test_unquoting(self):
280 # Make sure unquoting of all ASCII values works
281 escape_list = []
282 for num in range(128):
283 given = hexescape(chr(num))
284 expect = chr(num)
285 result = urllib.unquote(given)
286 self.assertEqual(expect, result,
287 "using unquote(): %s != %s" % (expect, result))
288 result = urllib.unquote_plus(given)
289 self.assertEqual(expect, result,
290 "using unquote_plus(): %s != %s" %
291 (expect, result))
292 escape_list.append(given)
293 escape_string = ''.join(escape_list)
294 del escape_list
295 result = urllib.unquote(escape_string)
296 self.assertEqual(result.count('%'), 1,
297 "using quote(): not all characters escaped; %s" %
298 result)
299 result = urllib.unquote(escape_string)
300 self.assertEqual(result.count('%'), 1,
301 "using unquote(): not all characters escaped: "
302 "%s" % result)
303
304 def test_unquoting_parts(self):
305 # Make sure unquoting works when have non-quoted characters
306 # interspersed
307 given = 'ab%sd' % hexescape('c')
308 expect = "abcd"
309 result = urllib.unquote(given)
310 self.assertEqual(expect, result,
311 "using quote(): %s != %s" % (expect, result))
312 result = urllib.unquote_plus(given)
313 self.assertEqual(expect, result,
314 "using unquote_plus(): %s != %s" % (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000315
Brett Cannon74bfd702003-04-25 09:39:47 +0000316 def test_unquoting_plus(self):
317 # Test difference between unquote() and unquote_plus()
318 given = "are+there+spaces..."
319 expect = given
320 result = urllib.unquote(given)
321 self.assertEqual(expect, result,
322 "using unquote(): %s != %s" % (expect, result))
323 expect = given.replace('+', ' ')
324 result = urllib.unquote_plus(given)
325 self.assertEqual(expect, result,
326 "using unquote_plus(): %s != %s" % (expect, result))
327
328class urlencode_Tests(unittest.TestCase):
329 """Tests for urlencode()"""
330
331 def help_inputtype(self, given, test_type):
332 """Helper method for testing different input types.
Tim Petersc2659cf2003-05-12 20:19:37 +0000333
Brett Cannon74bfd702003-04-25 09:39:47 +0000334 'given' must lead to only the pairs:
335 * 1st, 1
336 * 2nd, 2
337 * 3rd, 3
Tim Petersc2659cf2003-05-12 20:19:37 +0000338
Brett Cannon74bfd702003-04-25 09:39:47 +0000339 Test cannot assume anything about order. Docs make no guarantee and
340 have possible dictionary input.
Tim Petersc2659cf2003-05-12 20:19:37 +0000341
Brett Cannon74bfd702003-04-25 09:39:47 +0000342 """
343 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
344 result = urllib.urlencode(given)
345 for expected in expect_somewhere:
346 self.assert_(expected in result,
347 "testing %s: %s not found in %s" %
348 (test_type, expected, result))
349 self.assertEqual(result.count('&'), 2,
350 "testing %s: expected 2 '&'s; got %s" %
351 (test_type, result.count('&')))
352 amp_location = result.index('&')
353 on_amp_left = result[amp_location - 1]
354 on_amp_right = result[amp_location + 1]
355 self.assert_(on_amp_left.isdigit() and on_amp_right.isdigit(),
356 "testing %s: '&' not located in proper place in %s" %
357 (test_type, result))
358 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
359 "testing %s: "
360 "unexpected number of characters: %s != %s" %
361 (test_type, len(result), (5 * 3) + 2))
362
363 def test_using_mapping(self):
364 # Test passing in a mapping object as an argument.
365 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
366 "using dict as input type")
367
368 def test_using_sequence(self):
369 # Test passing in a sequence of two-item sequences as an argument.
370 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
371 "using sequence of two-item tuples as input")
372
373 def test_quoting(self):
374 # Make sure keys and values are quoted using quote_plus()
375 given = {"&":"="}
376 expect = "%s=%s" % (hexescape('&'), hexescape('='))
377 result = urllib.urlencode(given)
378 self.assertEqual(expect, result)
379 given = {"key name":"A bunch of pluses"}
380 expect = "key+name=A+bunch+of+pluses"
381 result = urllib.urlencode(given)
382 self.assertEqual(expect, result)
383
384 def test_doseq(self):
385 # Test that passing True for 'doseq' parameter works correctly
386 given = {'sequence':['1', '2', '3']}
387 expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
388 result = urllib.urlencode(given)
389 self.assertEqual(expect, result)
390 result = urllib.urlencode(given, True)
391 for value in given["sequence"]:
392 expect = "sequence=%s" % value
393 self.assert_(expect in result,
394 "%s not found in %s" % (expect, result))
395 self.assertEqual(result.count('&'), 2,
396 "Expected 2 '&'s, got %s" % result.count('&'))
397
398class Pathname_Tests(unittest.TestCase):
399 """Test pathname2url() and url2pathname()"""
400
401 def test_basic(self):
402 # Make sure simple tests pass
403 expected_path = os.path.join("parts", "of", "a", "path")
404 expected_url = "parts/of/a/path"
405 result = urllib.pathname2url(expected_path)
406 self.assertEqual(expected_url, result,
407 "pathname2url() failed; %s != %s" %
408 (result, expected_url))
409 result = urllib.url2pathname(expected_url)
410 self.assertEqual(expected_path, result,
411 "url2pathame() failed; %s != %s" %
412 (result, expected_path))
413
414 def test_quoting(self):
415 # Test automatic quoting and unquoting works for pathnam2url() and
416 # url2pathname() respectively
417 given = os.path.join("needs", "quot=ing", "here")
418 expect = "needs/%s/here" % urllib.quote("quot=ing")
419 result = urllib.pathname2url(given)
420 self.assertEqual(expect, result,
421 "pathname2url() failed; %s != %s" %
422 (expect, result))
423 expect = given
424 result = urllib.url2pathname(result)
425 self.assertEqual(expect, result,
426 "url2pathname() failed; %s != %s" %
427 (expect, result))
428 given = os.path.join("make sure", "using_quote")
429 expect = "%s/using_quote" % urllib.quote("make sure")
430 result = urllib.pathname2url(given)
431 self.assertEqual(expect, result,
432 "pathname2url() failed; %s != %s" %
433 (expect, result))
434 given = "make+sure/using_unquote"
435 expect = os.path.join("make+sure", "using_unquote")
436 result = urllib.url2pathname(given)
437 self.assertEqual(expect, result,
438 "url2pathname() failed; %s != %s" %
439 (expect, result))
Tim Petersc2659cf2003-05-12 20:19:37 +0000440
Skip Montanaro080c9972001-01-28 21:12:22 +0000441
442
Brett Cannon74bfd702003-04-25 09:39:47 +0000443def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000444 test_support.run_unittest(
445 urlopen_FileTests,
Hye-Shik Chang39aef792004-06-05 13:30:56 +0000446 urlopen_HttpTests,
Walter Dörwald21d3a322003-05-01 17:45:56 +0000447 urlretrieve_FileTests,
448 QuotingTests,
449 UnquotingTests,
450 urlencode_Tests,
451 Pathname_Tests
452 )
Brett Cannon74bfd702003-04-25 09:39:47 +0000453
454
455
456if __name__ == '__main__':
457 test_main()