blob: 67252891d63d302540c63d9eba4decd60f1e6bcd [file] [log] [blame]
Brett Cannon74bfd702003-04-25 09:39:47 +00001"""Regresssion tests for urllib"""
2
Jeremy Hylton6102e292000-08-31 15:48:10 +00003import urllib
Brett Cannon74bfd702003-04-25 09:39:47 +00004import unittest
5from test import test_support
6import os
7import mimetools
Jeremy Hylton6102e292000-08-31 15:48:10 +00008
Brett Cannon74bfd702003-04-25 09:39:47 +00009def hexescape(char):
10 """Escape char as RFC 2396 specifies"""
11 hex_repr = hex(ord(char))[2:].upper()
12 if len(hex_repr) == 1:
13 hex_repr = "0%s" % hex_repr
14 return "%" + hex_repr
Jeremy Hylton6102e292000-08-31 15:48:10 +000015
Brett Cannon74bfd702003-04-25 09:39:47 +000016class urlopen_FileTests(unittest.TestCase):
17 """Test urlopen() opening a temporary file.
Jeremy Hylton6102e292000-08-31 15:48:10 +000018
Brett Cannon74bfd702003-04-25 09:39:47 +000019 Try to test as much functionality as possible so as to cut down on reliance
20 on connect to the Net for testing.
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000021
Brett Cannon74bfd702003-04-25 09:39:47 +000022 """
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000023
Brett Cannon74bfd702003-04-25 09:39:47 +000024 def setUp(self):
25 """Setup of a temp file to use for testing"""
26 self.text = "test_urllib: %s\n" % self.__class__.__name__
27 FILE = file(test_support.TESTFN, 'w')
28 try:
29 FILE.write(self.text)
30 finally:
31 FILE.close()
32 self.pathname = test_support.TESTFN
33 self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000034
Brett Cannon74bfd702003-04-25 09:39:47 +000035 def tearDown(self):
36 """Shut down the open object"""
37 self.returned_obj.close()
Jeremy Hylton7ae51bf2000-09-14 16:59:07 +000038
Brett Cannon74bfd702003-04-25 09:39:47 +000039 def test_interface(self):
40 # Make sure object returned by urlopen() has the specified methods
41 for attr in ("read", "readline", "readlines", "fileno",
42 "close", "info", "geturl", "__iter__"):
43 self.assert_(hasattr(self.returned_obj, attr),
44 "object returned by urlopen() lacks %s attribute" %
45 attr)
Skip Montanaroe78b92a2001-01-20 20:22:30 +000046
Brett Cannon74bfd702003-04-25 09:39:47 +000047 def test_read(self):
48 self.assertEqual(self.text, self.returned_obj.read())
Skip Montanaro080c9972001-01-28 21:12:22 +000049
Brett Cannon74bfd702003-04-25 09:39:47 +000050 def test_readline(self):
51 self.assertEqual(self.text, self.returned_obj.readline())
52 self.assertEqual('', self.returned_obj.readline(),
53 "calling readline() after exhausting the file did not"
54 " return an empty string")
Skip Montanaro080c9972001-01-28 21:12:22 +000055
Brett Cannon74bfd702003-04-25 09:39:47 +000056 def test_readlines(self):
57 lines_list = self.returned_obj.readlines()
58 self.assertEqual(len(lines_list), 1,
59 "readlines() returned the wrong number of lines")
60 self.assertEqual(lines_list[0], self.text,
61 "readlines() returned improper text")
Skip Montanaro080c9972001-01-28 21:12:22 +000062
Brett Cannon74bfd702003-04-25 09:39:47 +000063 def test_fileno(self):
64 file_num = self.returned_obj.fileno()
65 self.assert_(isinstance(file_num, int),
66 "fileno() did not return an int")
67 self.assertEqual(os.read(file_num, len(self.text)), self.text,
68 "Reading on the file descriptor returned by fileno() "
69 "did not return the expected text")
Skip Montanaroe78b92a2001-01-20 20:22:30 +000070
Brett Cannon74bfd702003-04-25 09:39:47 +000071 def test_close(self):
72 # Test close() by calling it hear and then having it be called again
73 # by the tearDown() method for the test
74 self.returned_obj.close()
Skip Montanaro080c9972001-01-28 21:12:22 +000075
Brett Cannon74bfd702003-04-25 09:39:47 +000076 def test_info(self):
77 self.assert_(isinstance(self.returned_obj.info(), mimetools.Message))
Skip Montanaroe78b92a2001-01-20 20:22:30 +000078
Brett Cannon74bfd702003-04-25 09:39:47 +000079 def test_geturl(self):
80 self.assertEqual(self.returned_obj.geturl(), self.pathname)
Skip Montanaro080c9972001-01-28 21:12:22 +000081
Brett Cannon74bfd702003-04-25 09:39:47 +000082 def test_iter(self):
83 # Test iterator
84 # Don't need to count number of iterations since test would fail the
85 # instant it returned anything beyond the first line from the
86 # comparison
87 for line in self.returned_obj.__iter__():
88 self.assertEqual(line, self.text)
Skip Montanaro080c9972001-01-28 21:12:22 +000089
Brett Cannon74bfd702003-04-25 09:39:47 +000090class urlretrieve_Tests(unittest.TestCase):
91 """Test urllib.urlretrieve() on local files"""
Skip Montanaro080c9972001-01-28 21:12:22 +000092 pass
93
Brett Cannon74bfd702003-04-25 09:39:47 +000094class _urlopener_Tests(unittest.TestCase):
95 """Make sure urlopen() and urlretrieve() use the class assigned to
96 _urlopener"""
97 #XXX: Maybe create a custom class here that takes in a list and modifies
98 # it to signal that it was called?
99 pass
Skip Montanaro080c9972001-01-28 21:12:22 +0000100
Brett Cannon74bfd702003-04-25 09:39:47 +0000101class QuotingTests(unittest.TestCase):
102 """Tests for urllib.quote() and urllib.quote_plus()
103
104 According to RFC 2396 ("Uniform Resource Identifiers), to escape a
105 character you write it as '%' + <2 character US-ASCII hex value>. The Python
106 code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
107 Case does not matter on the hex letters.
108
109 The various character sets specified are:
110
111 Reserved characters : ";/?:@&=+$,"
112 Have special meaning in URIs and must be escaped if not being used for
113 their special meaning
114 Data characters : letters, digits, and "-_.!~*'()"
115 Unreserved and do not need to be escaped; can be, though, if desired
116 Control characters : 0x00 - 0x1F, 0x7F
117 Have no use in URIs so must be escaped
118 space : 0x20
119 Must be escaped
120 Delimiters : '<>#%"'
121 Must be escaped
122 Unwise : "{}|\^[]`"
123 Must be escaped
124
125 """
126
127 def test_never_quote(self):
128 # Make sure quote() does not quote letters, digits, and "_,.-"
129 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
130 "abcdefghijklmnopqrstuvwxyz",
131 "0123456789",
132 "_.-"])
133 result = urllib.quote(do_not_quote)
134 self.assertEqual(do_not_quote, result,
135 "using quote(): %s != %s" % (do_not_quote, result))
136 result = urllib.quote_plus(do_not_quote)
137 self.assertEqual(do_not_quote, result,
138 "using quote_plus(): %s != %s" % (do_not_quote, result))
139
140 def test_default_safe(self):
141 # Test '/' is default value for 'safe' parameter
142 self.assertEqual(urllib.quote.func_defaults[0], '/')
143
144 def test_safe(self):
145 # Test setting 'safe' parameter does what it should do
146 quote_by_default = "<>"
147 result = urllib.quote(quote_by_default, safe=quote_by_default)
148 self.assertEqual(quote_by_default, result,
149 "using quote(): %s != %s" % (quote_by_default, result))
150 result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
151 self.assertEqual(quote_by_default, result,
152 "using quote_plus(): %s != %s" %
153 (quote_by_default, result))
154
155 def test_default_quoting(self):
156 # Make sure all characters that should be quoted are by default sans
157 # space (separate test for that).
158 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
159 should_quote.append('<>#%"{}|\^[]`')
160 should_quote.append(chr(127)) # For 0x7F
161 should_quote = ''.join(should_quote)
162 for char in should_quote:
163 result = urllib.quote(char)
164 self.assertEqual(hexescape(char), result,
165 "using quote(): %s should be escaped to %s, not %s" %
166 (char, hexescape(char), result))
167 result = urllib.quote_plus(char)
168 self.assertEqual(hexescape(char), result,
169 "using quote_plus(): "
170 "%s should be escapes to %s, not %s" %
171 (char, hexescape(char), result))
172 del should_quote
173 partial_quote = "ab[]cd"
174 expected = "ab%5B%5Dcd"
175 result = urllib.quote(partial_quote)
176 self.assertEqual(expected, result,
177 "using quote(): %s != %s" % (expected, result))
178 self.assertEqual(expected, result,
179 "using quote_plus(): %s != %s" % (expected, result))
180
181 def test_quoting_space(self):
182 # Make sure quote() and quote_plus() handle spaces as specified in
183 # their unique way
184 result = urllib.quote(' ')
185 self.assertEqual(result, hexescape(' '),
186 "using quote(): %s != %s" % (result, hexescape(' ')))
187 result = urllib.quote_plus(' ')
188 self.assertEqual(result, '+',
189 "using quote_plus(): %s != +" % result)
190 given = "a b cd e f"
191 expect = given.replace(' ', hexescape(' '))
192 result = urllib.quote(given)
193 self.assertEqual(expect, result,
194 "using quote(): %s != %s" % (expect, result))
195 expect = given.replace(' ', '+')
196 result = urllib.quote_plus(given)
197 self.assertEqual(expect, result,
198 "using quote_plus(): %s != %s" % (expect, result))
199
200class UnquotingTests(unittest.TestCase):
201 """Tests for unquote() and unquote_plus()
202
203 See the doc string for quoting_Tests for details on quoting and such.
204
205 """
206
207 def test_unquoting(self):
208 # Make sure unquoting of all ASCII values works
209 escape_list = []
210 for num in range(128):
211 given = hexescape(chr(num))
212 expect = chr(num)
213 result = urllib.unquote(given)
214 self.assertEqual(expect, result,
215 "using unquote(): %s != %s" % (expect, result))
216 result = urllib.unquote_plus(given)
217 self.assertEqual(expect, result,
218 "using unquote_plus(): %s != %s" %
219 (expect, result))
220 escape_list.append(given)
221 escape_string = ''.join(escape_list)
222 del escape_list
223 result = urllib.unquote(escape_string)
224 self.assertEqual(result.count('%'), 1,
225 "using quote(): not all characters escaped; %s" %
226 result)
227 result = urllib.unquote(escape_string)
228 self.assertEqual(result.count('%'), 1,
229 "using unquote(): not all characters escaped: "
230 "%s" % result)
231
232 def test_unquoting_parts(self):
233 # Make sure unquoting works when have non-quoted characters
234 # interspersed
235 given = 'ab%sd' % hexescape('c')
236 expect = "abcd"
237 result = urllib.unquote(given)
238 self.assertEqual(expect, result,
239 "using quote(): %s != %s" % (expect, result))
240 result = urllib.unquote_plus(given)
241 self.assertEqual(expect, result,
242 "using unquote_plus(): %s != %s" % (expect, result))
243
244 def test_unquoting_plus(self):
245 # Test difference between unquote() and unquote_plus()
246 given = "are+there+spaces..."
247 expect = given
248 result = urllib.unquote(given)
249 self.assertEqual(expect, result,
250 "using unquote(): %s != %s" % (expect, result))
251 expect = given.replace('+', ' ')
252 result = urllib.unquote_plus(given)
253 self.assertEqual(expect, result,
254 "using unquote_plus(): %s != %s" % (expect, result))
255
256class urlencode_Tests(unittest.TestCase):
257 """Tests for urlencode()"""
258
259 def help_inputtype(self, given, test_type):
260 """Helper method for testing different input types.
261
262 'given' must lead to only the pairs:
263 * 1st, 1
264 * 2nd, 2
265 * 3rd, 3
266
267 Test cannot assume anything about order. Docs make no guarantee and
268 have possible dictionary input.
269
270 """
271 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
272 result = urllib.urlencode(given)
273 for expected in expect_somewhere:
274 self.assert_(expected in result,
275 "testing %s: %s not found in %s" %
276 (test_type, expected, result))
277 self.assertEqual(result.count('&'), 2,
278 "testing %s: expected 2 '&'s; got %s" %
279 (test_type, result.count('&')))
280 amp_location = result.index('&')
281 on_amp_left = result[amp_location - 1]
282 on_amp_right = result[amp_location + 1]
283 self.assert_(on_amp_left.isdigit() and on_amp_right.isdigit(),
284 "testing %s: '&' not located in proper place in %s" %
285 (test_type, result))
286 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
287 "testing %s: "
288 "unexpected number of characters: %s != %s" %
289 (test_type, len(result), (5 * 3) + 2))
290
291 def test_using_mapping(self):
292 # Test passing in a mapping object as an argument.
293 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
294 "using dict as input type")
295
296 def test_using_sequence(self):
297 # Test passing in a sequence of two-item sequences as an argument.
298 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
299 "using sequence of two-item tuples as input")
300
301 def test_quoting(self):
302 # Make sure keys and values are quoted using quote_plus()
303 given = {"&":"="}
304 expect = "%s=%s" % (hexescape('&'), hexescape('='))
305 result = urllib.urlencode(given)
306 self.assertEqual(expect, result)
307 given = {"key name":"A bunch of pluses"}
308 expect = "key+name=A+bunch+of+pluses"
309 result = urllib.urlencode(given)
310 self.assertEqual(expect, result)
311
312 def test_doseq(self):
313 # Test that passing True for 'doseq' parameter works correctly
314 given = {'sequence':['1', '2', '3']}
315 expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
316 result = urllib.urlencode(given)
317 self.assertEqual(expect, result)
318 result = urllib.urlencode(given, True)
319 for value in given["sequence"]:
320 expect = "sequence=%s" % value
321 self.assert_(expect in result,
322 "%s not found in %s" % (expect, result))
323 self.assertEqual(result.count('&'), 2,
324 "Expected 2 '&'s, got %s" % result.count('&'))
325
326class Pathname_Tests(unittest.TestCase):
327 """Test pathname2url() and url2pathname()"""
328
329 def test_basic(self):
330 # Make sure simple tests pass
331 expected_path = os.path.join("parts", "of", "a", "path")
332 expected_url = "parts/of/a/path"
333 result = urllib.pathname2url(expected_path)
334 self.assertEqual(expected_url, result,
335 "pathname2url() failed; %s != %s" %
336 (result, expected_url))
337 result = urllib.url2pathname(expected_url)
338 self.assertEqual(expected_path, result,
339 "url2pathame() failed; %s != %s" %
340 (result, expected_path))
341
342 def test_quoting(self):
343 # Test automatic quoting and unquoting works for pathnam2url() and
344 # url2pathname() respectively
345 given = os.path.join("needs", "quot=ing", "here")
346 expect = "needs/%s/here" % urllib.quote("quot=ing")
347 result = urllib.pathname2url(given)
348 self.assertEqual(expect, result,
349 "pathname2url() failed; %s != %s" %
350 (expect, result))
351 expect = given
352 result = urllib.url2pathname(result)
353 self.assertEqual(expect, result,
354 "url2pathname() failed; %s != %s" %
355 (expect, result))
356 given = os.path.join("make sure", "using_quote")
357 expect = "%s/using_quote" % urllib.quote("make sure")
358 result = urllib.pathname2url(given)
359 self.assertEqual(expect, result,
360 "pathname2url() failed; %s != %s" %
361 (expect, result))
362 given = "make+sure/using_unquote"
363 expect = os.path.join("make+sure", "using_unquote")
364 result = urllib.url2pathname(given)
365 self.assertEqual(expect, result,
366 "url2pathname() failed; %s != %s" %
367 (expect, result))
368
Skip Montanaro080c9972001-01-28 21:12:22 +0000369
370
Brett Cannon74bfd702003-04-25 09:39:47 +0000371def test_main():
372 test_suite = unittest.TestSuite()
373 test_suite.addTest(unittest.makeSuite(urlopen_FileTests))
374 test_suite.addTest(unittest.makeSuite(QuotingTests))
375 test_suite.addTest(unittest.makeSuite(UnquotingTests))
376 test_suite.addTest(unittest.makeSuite(urlencode_Tests))
377 test_suite.addTest(unittest.makeSuite(Pathname_Tests))
378 test_support.run_suite(test_suite)
379
380
381
382if __name__ == '__main__':
383 test_main()