blob: 72f37769e275075cb4fa9f39a53613dad60df195 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00009RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +000010SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000011
Facundo Batistac469d4c2008-09-03 22:49:01 +000012# A list of test cases. Each test case is a a two-tuple that contains
13# a string with the query and a dictionary with the expected result.
14
15parse_qsl_test_cases = [
16 ("", []),
17 ("&", []),
18 ("&&", []),
19 ("=", [('', '')]),
20 ("=a", [('', 'a')]),
21 ("a", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("a=", [('a', '')]),
24 ("&a=b", [('a', 'b')]),
25 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
26 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000027 (b"", []),
28 (b"&", []),
29 (b"&&", []),
30 (b"=", [(b'', b'')]),
31 (b"=a", [(b'', b'a')]),
32 (b"a", [(b'a', b'')]),
33 (b"a=", [(b'a', b'')]),
34 (b"a=", [(b'a', b'')]),
35 (b"&a=b", [(b'a', b'b')]),
36 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
37 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000038]
39
Skip Montanaro6ec967d2002-03-23 05:32:10 +000040class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041
42 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000044 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000045 t = (result.scheme, result.netloc, result.path,
46 result.params, result.query, result.fragment)
47 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000048 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000049 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000050 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000051 self.assertEqual(result2, result.geturl())
52
53 # the result of geturl() is a fixpoint; we can always parse it
54 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000056 self.assertEqual(result3.geturl(), result.geturl())
57 self.assertEqual(result3, result)
58 self.assertEqual(result3.scheme, result.scheme)
59 self.assertEqual(result3.netloc, result.netloc)
60 self.assertEqual(result3.path, result.path)
61 self.assertEqual(result3.params, result.params)
62 self.assertEqual(result3.query, result.query)
63 self.assertEqual(result3.fragment, result.fragment)
64 self.assertEqual(result3.username, result.username)
65 self.assertEqual(result3.password, result.password)
66 self.assertEqual(result3.hostname, result.hostname)
67 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068
69 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000070 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000071 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000072 t = (result.scheme, result.netloc, result.path,
73 result.query, result.fragment)
74 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000075 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000076 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000077 self.assertEqual(result2, result.geturl())
78
79 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000080 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000081 self.assertEqual(result3.geturl(), result.geturl())
82 self.assertEqual(result3, result)
83 self.assertEqual(result3.scheme, result.scheme)
84 self.assertEqual(result3.netloc, result.netloc)
85 self.assertEqual(result3.path, result.path)
86 self.assertEqual(result3.query, result.query)
87 self.assertEqual(result3.fragment, result.fragment)
88 self.assertEqual(result3.username, result.username)
89 self.assertEqual(result3.password, result.password)
90 self.assertEqual(result3.hostname, result.hostname)
91 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000092
Facundo Batistac469d4c2008-09-03 22:49:01 +000093 def test_qsl(self):
94 for orig, expect in parse_qsl_test_cases:
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080096 self.assertEqual(result, expect, "Error parsing %r" % orig)
97 expect_without_blanks = [v for v in expect if len(v[1])]
98 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
99 self.assertEqual(result, expect_without_blanks,
100 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000101
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000102 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000103 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000104 ('file:///tmp/junk.txt',
105 ('file', '', '/tmp/junk.txt', '', '', ''),
106 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000107 ('imap://mail.python.org/mbox1',
108 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
109 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000110 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000111 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
112 '', '', ''),
113 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
114 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000115 ('nfs://server/path/to/file.txt',
116 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
117 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000118 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
119 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
120 '', '', ''),
121 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000122 '', '')),
123 ('git+ssh://git@github.com/user/project.git',
124 ('git+ssh', 'git@github.com','/user/project.git',
125 '','',''),
126 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000127 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000128 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000129 def _encode(t):
130 return (t[0].encode('ascii'),
131 tuple(x.encode('ascii') for x in t[1]),
132 tuple(x.encode('ascii') for x in t[2]))
133 bytes_cases = [_encode(x) for x in str_cases]
134 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000135 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000136
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000137 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000138 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 # so we test both 'http:' and 'https:' in all the following.
140 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000141 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000142 ('://www.python.org',
143 ('www.python.org', '', '', '', ''),
144 ('www.python.org', '', '', '')),
145 ('://www.python.org#abc',
146 ('www.python.org', '', '', '', 'abc'),
147 ('www.python.org', '', '', 'abc')),
148 ('://www.python.org?q=abc',
149 ('www.python.org', '', '', 'q=abc', ''),
150 ('www.python.org', '', 'q=abc', '')),
151 ('://www.python.org/#abc',
152 ('www.python.org', '/', '', '', 'abc'),
153 ('www.python.org', '/', '', 'abc')),
154 ('://a/b/c/d;p?q#f',
155 ('a', '/b/c/d', 'p', 'q', 'f'),
156 ('a', '/b/c/d;p', 'q', 'f')),
157 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000158 def _encode(t):
159 return (t[0].encode('ascii'),
160 tuple(x.encode('ascii') for x in t[1]),
161 tuple(x.encode('ascii') for x in t[2]))
162 bytes_cases = [_encode(x) for x in str_cases]
163 str_schemes = ('http', 'https')
164 bytes_schemes = (b'http', b'https')
165 str_tests = str_schemes, str_cases
166 bytes_tests = bytes_schemes, bytes_cases
167 for schemes, test_cases in (str_tests, bytes_tests):
168 for scheme in schemes:
169 for url, parsed, split in test_cases:
170 url = scheme + url
171 parsed = (scheme,) + parsed
172 split = (scheme,) + split
173 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000174
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000175 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000176 str_components = (base, relurl, expected)
177 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
178 bytes_components = baseb, relurlb, expectedb = [
179 x.encode('ascii') for x in str_components]
180 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000181
182 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000183 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
184 bytes_cases = [x.encode('ascii') for x in str_cases]
185 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000186 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
187 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000188
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000189 def test_RFC1808(self):
190 # "normal" cases from RFC 1808:
191 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
192 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
193 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
194 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
195 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
196 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000197 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
198 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
199 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
200 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
201 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
202 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000203 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
204 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
205 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
206 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
207 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
208 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
209 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
210 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
211 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
212 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000213
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000214 # "abnormal" cases from RFC 1808:
215 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
216 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
217 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
218 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
219 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
220 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
221 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
222 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
223 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
224 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
225 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
226 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
227 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000228
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000229 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
230 # so we'll not actually run these tests (which expect 1808 behavior).
231 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
232 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000233
Senthil Kumaran397eb442011-04-15 18:20:24 +0800234 def test_RFC2368(self):
235 # Issue 11467: path that starts with a number is not parsed correctly
236 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
237 ('mailto', '', '1337@example.org', '', '', ''))
238
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000239 def test_RFC2396(self):
240 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000241
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000242
243 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
244 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
245 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
246 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
247 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
248 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
249 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
250 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
251 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
252 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
253 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
254 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
255 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
256 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
257 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
258 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
259 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
260 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
261 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
262 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
263 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
264 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
265 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
266 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
267 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
268 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
269 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
270 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
271 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
272 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
273 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
274 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
275 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
276 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
277 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
278 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
279 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
280 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
281 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
282
Facundo Batista23e38562008-08-14 16:55:14 +0000283 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000284 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000285 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
286 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000287 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
288 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
289 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
290 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
291 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
292 self.checkJoin(RFC3986_BASE, '//g','http://g')
293 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
294 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
295 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
296 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
297 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
298 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
299 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
300 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
301 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
302 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
303 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
304 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
305 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
306 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
307 self.checkJoin(RFC3986_BASE, '../..','http://a/')
308 self.checkJoin(RFC3986_BASE, '../../','http://a/')
309 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
310
311 #Abnormal Examples
312
313 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
314 # Tests are here for reference.
315
316 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
317 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
318 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
319 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
320
321 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
322 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
323 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
324 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
325 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
326 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
327 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
328 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
329 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
330 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
331 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
332 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
333 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
334 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
335 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
336 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000337
Senthil Kumarandca5b862010-12-17 04:48:45 +0000338 # Test for issue9721
339 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
340
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000341 def test_urljoins(self):
342 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
343 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
344 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
345 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
346 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
347 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
348 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
349 self.checkJoin(SIMPLE_BASE, '//g','http://g')
350 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
351 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
352 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
353 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
354 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
355 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
356 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
357 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
358 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
359 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
360 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
361 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
362 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
363 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
364 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
365 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
366 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
367 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
368 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
369 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
370 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800371 self.checkJoin('http:///', '..','http:///')
372 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
373 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000374
Senthil Kumaranad02d232010-04-16 03:02:13 +0000375 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000376 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000377 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
378 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
379 ('http://[::1]:5432/foo/', '::1', 5432),
380 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
381 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
382 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
383 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
384 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
385 ('http://[::ffff:12.34.56.78]:5432/foo/',
386 '::ffff:12.34.56.78', 5432),
387 ('http://Test.python.org/foo/', 'test.python.org', None),
388 ('http://12.34.56.78/foo/', '12.34.56.78', None),
389 ('http://[::1]/foo/', '::1', None),
390 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
391 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
392 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
393 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
394 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
395 ('http://[::ffff:12.34.56.78]/foo/',
396 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000397 ]
398 def _encode(t):
399 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
400 bytes_cases = [_encode(x) for x in str_cases]
401 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000402 urlparsed = urllib.parse.urlparse(url)
403 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
404
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000405 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000406 'http://::12.34.56.78]/',
407 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000408 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000409 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000410 'http://[::ffff:12.34.56.78']
411 bytes_cases = [x.encode('ascii') for x in str_cases]
412 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000413 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000414
Fred Drake70705652002-10-16 21:02:36 +0000415 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000416 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000417 ('http://python.org#frag', 'http://python.org', 'frag'),
418 ('http://python.org', 'http://python.org', ''),
419 ('http://python.org/#frag', 'http://python.org/', 'frag'),
420 ('http://python.org/', 'http://python.org/', ''),
421 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
422 ('http://python.org/?q', 'http://python.org/?q', ''),
423 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
424 ('http://python.org/p?q', 'http://python.org/p?q', ''),
425 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
426 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000427 ]
428 def _encode(t):
429 return type(t)(x.encode('ascii') for x in t)
430 bytes_cases = [_encode(x) for x in str_cases]
431 for url, defrag, frag in str_cases + bytes_cases:
432 result = urllib.parse.urldefrag(url)
433 self.assertEqual(result.geturl(), url)
434 self.assertEqual(result, (defrag, frag))
435 self.assertEqual(result.url, defrag)
436 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000437
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000438 def test_urlsplit_attributes(self):
439 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000440 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000441 self.assertEqual(p.scheme, "http")
442 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
443 self.assertEqual(p.path, "/doc/")
444 self.assertEqual(p.query, "")
445 self.assertEqual(p.fragment, "frag")
446 self.assertEqual(p.username, None)
447 self.assertEqual(p.password, None)
448 self.assertEqual(p.hostname, "www.python.org")
449 self.assertEqual(p.port, None)
450 # geturl() won't return exactly the original URL in this case
451 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000452 # We handle this by ignoring the first 4 characters of the URL
453 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000454
455 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000456 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000457 self.assertEqual(p.scheme, "http")
458 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
459 self.assertEqual(p.path, "/doc/")
460 self.assertEqual(p.query, "query=yes")
461 self.assertEqual(p.fragment, "frag")
462 self.assertEqual(p.username, "User")
463 self.assertEqual(p.password, "Pass")
464 self.assertEqual(p.hostname, "www.python.org")
465 self.assertEqual(p.port, 80)
466 self.assertEqual(p.geturl(), url)
467
Christian Heimesfaf2f632008-01-06 16:59:19 +0000468 # Addressing issue1698, which suggests Username can contain
469 # "@" characters. Though not RFC compliant, many ftp sites allow
470 # and request email addresses as usernames.
471
472 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000473 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000474 self.assertEqual(p.scheme, "http")
475 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
476 self.assertEqual(p.path, "/doc/")
477 self.assertEqual(p.query, "query=yes")
478 self.assertEqual(p.fragment, "frag")
479 self.assertEqual(p.username, "User@example.com")
480 self.assertEqual(p.password, "Pass")
481 self.assertEqual(p.hostname, "www.python.org")
482 self.assertEqual(p.port, 80)
483 self.assertEqual(p.geturl(), url)
484
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000485 # And check them all again, only with bytes this time
486 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
487 p = urllib.parse.urlsplit(url)
488 self.assertEqual(p.scheme, b"http")
489 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
490 self.assertEqual(p.path, b"/doc/")
491 self.assertEqual(p.query, b"")
492 self.assertEqual(p.fragment, b"frag")
493 self.assertEqual(p.username, None)
494 self.assertEqual(p.password, None)
495 self.assertEqual(p.hostname, b"www.python.org")
496 self.assertEqual(p.port, None)
497 self.assertEqual(p.geturl()[4:], url[4:])
498
499 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
500 p = urllib.parse.urlsplit(url)
501 self.assertEqual(p.scheme, b"http")
502 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
503 self.assertEqual(p.path, b"/doc/")
504 self.assertEqual(p.query, b"query=yes")
505 self.assertEqual(p.fragment, b"frag")
506 self.assertEqual(p.username, b"User")
507 self.assertEqual(p.password, b"Pass")
508 self.assertEqual(p.hostname, b"www.python.org")
509 self.assertEqual(p.port, 80)
510 self.assertEqual(p.geturl(), url)
511
512 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
513 p = urllib.parse.urlsplit(url)
514 self.assertEqual(p.scheme, b"http")
515 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
516 self.assertEqual(p.path, b"/doc/")
517 self.assertEqual(p.query, b"query=yes")
518 self.assertEqual(p.fragment, b"frag")
519 self.assertEqual(p.username, b"User@example.com")
520 self.assertEqual(p.password, b"Pass")
521 self.assertEqual(p.hostname, b"www.python.org")
522 self.assertEqual(p.port, 80)
523 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000524
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000525 def test_attributes_bad_port(self):
526 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000527 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000528 self.assertEqual(p.netloc, "www.example.net:foo")
529 self.assertRaises(ValueError, lambda: p.port)
530
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000531 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000532 self.assertEqual(p.netloc, "www.example.net:foo")
533 self.assertRaises(ValueError, lambda: p.port)
534
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000535 # Once again, repeat ourselves to test bytes
536 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
537 self.assertEqual(p.netloc, b"www.example.net:foo")
538 self.assertRaises(ValueError, lambda: p.port)
539
540 p = urllib.parse.urlparse(b"http://www.example.net:foo")
541 self.assertEqual(p.netloc, b"www.example.net:foo")
542 self.assertRaises(ValueError, lambda: p.port)
543
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000544 def test_attributes_without_netloc(self):
545 # This example is straight from RFC 3261. It looks like it
546 # should allow the username, hostname, and port to be filled
547 # in, but doesn't. Since it's a URI and doesn't use the
548 # scheme://netloc syntax, the netloc and related attributes
549 # should be left empty.
550 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000551 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000552 self.assertEqual(p.netloc, "")
553 self.assertEqual(p.username, None)
554 self.assertEqual(p.password, None)
555 self.assertEqual(p.hostname, None)
556 self.assertEqual(p.port, None)
557 self.assertEqual(p.geturl(), uri)
558
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000559 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000560 self.assertEqual(p.netloc, "")
561 self.assertEqual(p.username, None)
562 self.assertEqual(p.password, None)
563 self.assertEqual(p.hostname, None)
564 self.assertEqual(p.port, None)
565 self.assertEqual(p.geturl(), uri)
566
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000567 # You guessed it, repeating the test with bytes input
568 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
569 p = urllib.parse.urlsplit(uri)
570 self.assertEqual(p.netloc, b"")
571 self.assertEqual(p.username, None)
572 self.assertEqual(p.password, None)
573 self.assertEqual(p.hostname, None)
574 self.assertEqual(p.port, None)
575 self.assertEqual(p.geturl(), uri)
576
577 p = urllib.parse.urlparse(uri)
578 self.assertEqual(p.netloc, b"")
579 self.assertEqual(p.username, None)
580 self.assertEqual(p.password, None)
581 self.assertEqual(p.hostname, None)
582 self.assertEqual(p.port, None)
583 self.assertEqual(p.geturl(), uri)
584
Christian Heimesfaf2f632008-01-06 16:59:19 +0000585 def test_noslash(self):
586 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000587 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000588 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000589 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
590 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000591
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000592 def test_withoutscheme(self):
593 # Test urlparse without scheme
594 # Issue 754016: urlparse goes wrong with IP:port without scheme
595 # RFC 1808 specifies that netloc should start with //, urlparse expects
596 # the same, otherwise it classifies the portion of url as path.
597 self.assertEqual(urllib.parse.urlparse("path"),
598 ('','','path','','',''))
599 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
600 ('','www.python.org:80','','','',''))
601 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
602 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000603 # Repeat for bytes input
604 self.assertEqual(urllib.parse.urlparse(b"path"),
605 (b'',b'',b'path',b'',b'',b''))
606 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
607 (b'',b'www.python.org:80',b'',b'',b'',b''))
608 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
609 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000610
611 def test_portseparator(self):
612 # Issue 754016 makes changes for port separator ':' from scheme separator
613 self.assertEqual(urllib.parse.urlparse("path:80"),
614 ('','','path:80','','',''))
615 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
616 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
617 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
618 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000619 # As usual, need to check bytes input as well
620 self.assertEqual(urllib.parse.urlparse(b"path:80"),
621 (b'',b'',b'path:80',b'',b'',b''))
622 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
623 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
624 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
625 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000626
Facundo Batista2ac5de22008-07-07 18:24:11 +0000627 def test_usingsys(self):
628 # Issue 3314: sys module is used in the error
629 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
630
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000631 def test_anyscheme(self):
632 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000633 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
634 ('s3', 'foo.com', '/stuff', '', '', ''))
635 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
636 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000637 # And for bytes...
638 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
639 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
640 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
641 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
642
643 def test_mixed_types_rejected(self):
644 # Several functions that process either strings or ASCII encoded bytes
645 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000646 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000647 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000648 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000649 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000650 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000651 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000652 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000653 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000654 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000655 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000656 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000657 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000658 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000659 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000660 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000661 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000662 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000663 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000664 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000665 urllib.parse.urljoin(b"http://python.org", "http://python.org")
666
667 def _check_result_type(self, str_type):
668 num_args = len(str_type._fields)
669 bytes_type = str_type._encoded_counterpart
670 self.assertIs(bytes_type._decoded_counterpart, str_type)
671 str_args = ('',) * num_args
672 bytes_args = (b'',) * num_args
673 str_result = str_type(*str_args)
674 bytes_result = bytes_type(*bytes_args)
675 encoding = 'ascii'
676 errors = 'strict'
677 self.assertEqual(str_result, str_args)
678 self.assertEqual(bytes_result.decode(), str_args)
679 self.assertEqual(bytes_result.decode(), str_result)
680 self.assertEqual(bytes_result.decode(encoding), str_args)
681 self.assertEqual(bytes_result.decode(encoding), str_result)
682 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
683 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
684 self.assertEqual(bytes_result, bytes_args)
685 self.assertEqual(str_result.encode(), bytes_args)
686 self.assertEqual(str_result.encode(), bytes_result)
687 self.assertEqual(str_result.encode(encoding), bytes_args)
688 self.assertEqual(str_result.encode(encoding), bytes_result)
689 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
690 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
691
692 def test_result_pairs(self):
693 # Check encoding and decoding between result pairs
694 result_types = [
695 urllib.parse.DefragResult,
696 urllib.parse.SplitResult,
697 urllib.parse.ParseResult,
698 ]
699 for result_type in result_types:
700 self._check_result_type(result_type)
701
Victor Stinner1d87deb2011-01-14 13:05:19 +0000702 def test_parse_qs_encoding(self):
703 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
704 self.assertEqual(result, {'key': ['\u0141\xE9']})
705 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
706 self.assertEqual(result, {'key': ['\u0141\xE9']})
707 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
708 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
709 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
710 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
711 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
712 errors="ignore")
713 self.assertEqual(result, {'key': ['\u0141-']})
714
715 def test_parse_qsl_encoding(self):
716 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
717 self.assertEqual(result, [('key', '\u0141\xE9')])
718 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
719 self.assertEqual(result, [('key', '\u0141\xE9')])
720 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
721 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
722 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
723 self.assertEqual(result, [('key', '\u0141\ufffd-')])
724 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
725 errors="ignore")
726 self.assertEqual(result, [('key', '\u0141-')])
727
Senthil Kumarande02a712011-07-23 18:27:45 +0800728 def test_splitnport(self):
729 # Normal cases are exercised by other tests; ensure that we also
730 # catch cases with no port specified. (testcase ensuring coverage)
731 result = urllib.parse.splitnport('parrot:88')
732 self.assertEqual(result, ('parrot', 88))
733 result = urllib.parse.splitnport('parrot')
734 self.assertEqual(result, ('parrot', -1))
735 result = urllib.parse.splitnport('parrot', 55)
736 self.assertEqual(result, ('parrot', 55))
737 result = urllib.parse.splitnport('parrot:')
738 self.assertEqual(result, ('parrot', None))
739
740 def test_splitquery(self):
741 # Normal cases are exercised by other tests; ensure that we also
742 # catch cases with no port specified (testcase ensuring coverage)
743 result = urllib.parse.splitquery('http://python.org/fake?foo=bar')
744 self.assertEqual(result, ('http://python.org/fake', 'foo=bar'))
745 result = urllib.parse.splitquery('http://python.org/fake?foo=bar?')
746 self.assertEqual(result, ('http://python.org/fake?foo=bar', ''))
747 result = urllib.parse.splitquery('http://python.org/fake')
748 self.assertEqual(result, ('http://python.org/fake', None))
749
750 def test_splitvalue(self):
751 # Normal cases are exercised by other tests; test pathological cases
752 # with no key/value pairs. (testcase ensuring coverage)
753 result = urllib.parse.splitvalue('foo=bar')
754 self.assertEqual(result, ('foo', 'bar'))
755 result = urllib.parse.splitvalue('foo=')
756 self.assertEqual(result, ('foo', ''))
757 result = urllib.parse.splitvalue('foobar')
758 self.assertEqual(result, ('foobar', None))
759
760 def test_to_bytes(self):
761 result = urllib.parse.to_bytes('http://www.python.org')
762 self.assertEqual(result, 'http://www.python.org')
763 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
764 'http://www.python.org/medi\u00e6val')
765
766 def test_urlencode_sequences(self):
767 # Other tests incidentally urlencode things; test non-covered cases:
768 # Sequence and object values.
769 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
770 self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5')
771
772 class Trivial:
773 def __str__(self):
774 return 'trivial'
775
776 result = urllib.parse.urlencode({'a': Trivial()}, True)
777 self.assertEqual(result, 'a=trivial')
778
779 def test_quote_from_bytes(self):
780 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
781 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
782 self.assertEqual(result, 'archaeological%20arcana')
783 result = urllib.parse.quote_from_bytes(b'')
784 self.assertEqual(result, '')
785
786 def test_unquote_to_bytes(self):
787 result = urllib.parse.unquote_to_bytes('abc%20def')
788 self.assertEqual(result, b'abc def')
789 result = urllib.parse.unquote_to_bytes('')
790 self.assertEqual(result, b'')
791
792 def test_quote_errors(self):
793 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
794 encoding='utf-8')
795 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000796
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000797
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000798def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000799 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000800
801if __name__ == "__main__":
802 test_main()