blob: 829997fd6a20564b630041c25e214bbbea171a8c [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00003
Fred Drakea4d18a02001-01-05 05:57:04 +00004RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00005RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00006RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00007SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00008
Ezio Melottie130a522011-10-19 10:58:56 +03009# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000010# a string with the query and a dictionary with the expected result.
11
12parse_qsl_test_cases = [
13 ("", []),
14 ("&", []),
15 ("&&", []),
16 ("=", [('', '')]),
17 ("=a", [('', 'a')]),
18 ("a", [('a', '')]),
19 ("a=", [('a', '')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000020 ("&a=b", [('a', 'b')]),
21 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
22 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000023 (b"", []),
24 (b"&", []),
25 (b"&&", []),
26 (b"=", [(b'', b'')]),
27 (b"=a", [(b'', b'a')]),
28 (b"a", [(b'a', b'')]),
29 (b"a=", [(b'a', b'')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000030 (b"&a=b", [(b'a', b'b')]),
31 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
32 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Senthil Kumarane38415e2016-04-16 07:33:15 -070033 (";", []),
34 (";;", []),
35 (";a=b", [('a', 'b')]),
36 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
37 ("a=1;a=2", [('a', '1'), ('a', '2')]),
38 (b";", []),
39 (b";;", []),
40 (b";a=b", [(b'a', b'b')]),
41 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
42 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
43]
44
45parse_qs_test_cases = [
46 ("", {}),
47 ("&", {}),
48 ("&&", {}),
49 ("=", {'': ['']}),
50 ("=a", {'': ['a']}),
51 ("a", {'a': ['']}),
52 ("a=", {'a': ['']}),
53 ("&a=b", {'a': ['b']}),
54 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
55 ("a=1&a=2", {'a': ['1', '2']}),
56 (b"", {}),
57 (b"&", {}),
58 (b"&&", {}),
59 (b"=", {b'': [b'']}),
60 (b"=a", {b'': [b'a']}),
61 (b"a", {b'a': [b'']}),
62 (b"a=", {b'a': [b'']}),
63 (b"&a=b", {b'a': [b'b']}),
64 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
65 (b"a=1&a=2", {b'a': [b'1', b'2']}),
66 (";", {}),
67 (";;", {}),
68 (";a=b", {'a': ['b']}),
69 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
70 ("a=1;a=2", {'a': ['1', '2']}),
71 (b";", {}),
72 (b";;", {}),
73 (b";a=b", {b'a': [b'b']}),
74 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
75 (b"a=1;a=2", {b'a': [b'1', b'2']}),
Facundo Batistac469d4c2008-09-03 22:49:01 +000076]
77
Skip Montanaro6ec967d2002-03-23 05:32:10 +000078class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000079
80 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000081 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000082 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000083 t = (result.scheme, result.netloc, result.path,
84 result.params, result.query, result.fragment)
85 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000086 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000087 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000088 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000089 self.assertEqual(result2, result.geturl())
90
91 # the result of geturl() is a fixpoint; we can always parse it
92 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000093 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000094 self.assertEqual(result3.geturl(), result.geturl())
95 self.assertEqual(result3, result)
96 self.assertEqual(result3.scheme, result.scheme)
97 self.assertEqual(result3.netloc, result.netloc)
98 self.assertEqual(result3.path, result.path)
99 self.assertEqual(result3.params, result.params)
100 self.assertEqual(result3.query, result.query)
101 self.assertEqual(result3.fragment, result.fragment)
102 self.assertEqual(result3.username, result.username)
103 self.assertEqual(result3.password, result.password)
104 self.assertEqual(result3.hostname, result.hostname)
105 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000106
107 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000109 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000110 t = (result.scheme, result.netloc, result.path,
111 result.query, result.fragment)
112 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000114 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000115 self.assertEqual(result2, result.geturl())
116
117 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000118 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000119 self.assertEqual(result3.geturl(), result.geturl())
120 self.assertEqual(result3, result)
121 self.assertEqual(result3.scheme, result.scheme)
122 self.assertEqual(result3.netloc, result.netloc)
123 self.assertEqual(result3.path, result.path)
124 self.assertEqual(result3.query, result.query)
125 self.assertEqual(result3.fragment, result.fragment)
126 self.assertEqual(result3.username, result.username)
127 self.assertEqual(result3.password, result.password)
128 self.assertEqual(result3.hostname, result.hostname)
129 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000130
Facundo Batistac469d4c2008-09-03 22:49:01 +0000131 def test_qsl(self):
132 for orig, expect in parse_qsl_test_cases:
133 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +0800134 self.assertEqual(result, expect, "Error parsing %r" % orig)
135 expect_without_blanks = [v for v in expect if len(v[1])]
136 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
137 self.assertEqual(result, expect_without_blanks,
138 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000139
Senthil Kumarane38415e2016-04-16 07:33:15 -0700140 def test_qs(self):
141 for orig, expect in parse_qs_test_cases:
142 result = urllib.parse.parse_qs(orig, keep_blank_values=True)
143 self.assertEqual(result, expect, "Error parsing %r" % orig)
144 expect_without_blanks = {v: expect[v]
145 for v in expect if len(expect[v][0])}
146 result = urllib.parse.parse_qs(orig, keep_blank_values=False)
147 self.assertEqual(result, expect_without_blanks,
148 "Error parsing %r" % orig)
149
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000150 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000151 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000152 ('file:///tmp/junk.txt',
153 ('file', '', '/tmp/junk.txt', '', '', ''),
154 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000155 ('imap://mail.python.org/mbox1',
156 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
157 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000158 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000159 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
160 '', '', ''),
161 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
162 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000163 ('nfs://server/path/to/file.txt',
164 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
165 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000166 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
167 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
168 '', '', ''),
169 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000170 '', '')),
171 ('git+ssh://git@github.com/user/project.git',
172 ('git+ssh', 'git@github.com','/user/project.git',
173 '','',''),
174 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000175 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000176 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000177 def _encode(t):
178 return (t[0].encode('ascii'),
179 tuple(x.encode('ascii') for x in t[1]),
180 tuple(x.encode('ascii') for x in t[2]))
181 bytes_cases = [_encode(x) for x in str_cases]
182 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000183 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000184
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000185 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000186 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000187 # so we test both 'http:' and 'https:' in all the following.
188 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000189 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000190 ('://www.python.org',
191 ('www.python.org', '', '', '', ''),
192 ('www.python.org', '', '', '')),
193 ('://www.python.org#abc',
194 ('www.python.org', '', '', '', 'abc'),
195 ('www.python.org', '', '', 'abc')),
196 ('://www.python.org?q=abc',
197 ('www.python.org', '', '', 'q=abc', ''),
198 ('www.python.org', '', 'q=abc', '')),
199 ('://www.python.org/#abc',
200 ('www.python.org', '/', '', '', 'abc'),
201 ('www.python.org', '/', '', 'abc')),
202 ('://a/b/c/d;p?q#f',
203 ('a', '/b/c/d', 'p', 'q', 'f'),
204 ('a', '/b/c/d;p', 'q', 'f')),
205 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000206 def _encode(t):
207 return (t[0].encode('ascii'),
208 tuple(x.encode('ascii') for x in t[1]),
209 tuple(x.encode('ascii') for x in t[2]))
210 bytes_cases = [_encode(x) for x in str_cases]
211 str_schemes = ('http', 'https')
212 bytes_schemes = (b'http', b'https')
213 str_tests = str_schemes, str_cases
214 bytes_tests = bytes_schemes, bytes_cases
215 for schemes, test_cases in (str_tests, bytes_tests):
216 for scheme in schemes:
217 for url, parsed, split in test_cases:
218 url = scheme + url
219 parsed = (scheme,) + parsed
220 split = (scheme,) + split
221 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000222
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000223 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000224 str_components = (base, relurl, expected)
225 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
226 bytes_components = baseb, relurlb, expectedb = [
227 x.encode('ascii') for x in str_components]
228 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000229
230 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000231 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
232 bytes_cases = [x.encode('ascii') for x in str_cases]
233 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
235 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000236
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000237 def test_RFC1808(self):
238 # "normal" cases from RFC 1808:
239 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
240 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
241 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
242 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
243 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
244 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000245 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
246 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
247 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
248 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
249 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
250 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000251 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
252 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
253 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
254 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
255 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
256 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
257 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
258 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
259 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
260 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000261
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000262 # "abnormal" cases from RFC 1808:
263 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000264 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
265 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
266 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
267 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
268 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
269 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
270 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
271 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000272
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000273 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
274 # so we'll not actually run these tests (which expect 1808 behavior).
275 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
276 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000277
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400278 # XXX: The following tests are no longer compatible with RFC3986
279 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
280 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
281 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
282 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
283
284
Senthil Kumaran397eb442011-04-15 18:20:24 +0800285 def test_RFC2368(self):
286 # Issue 11467: path that starts with a number is not parsed correctly
287 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
288 ('mailto', '', '1337@example.org', '', '', ''))
289
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000290 def test_RFC2396(self):
291 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000292
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000293
294 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
295 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
296 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
297 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
298 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
299 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
300 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
301 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
302 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
303 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
304 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
305 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
306 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
307 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
308 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
309 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
310 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
311 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
312 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
313 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
314 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000315 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
316 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
317 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
318 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
319 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
320 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
321 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
322 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
323 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
324 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
325 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
326 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
327 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
328 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
329
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400330 # XXX: The following tests are no longer compatible with RFC3986
331 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
332 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
333 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
334 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
335
336
Facundo Batista23e38562008-08-14 16:55:14 +0000337 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000338 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000339 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400340 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000341 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
342 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
343 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
344 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
345 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
346 self.checkJoin(RFC3986_BASE, '//g','http://g')
347 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
348 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
349 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
350 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
351 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
352 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
353 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
354 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
355 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
356 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
357 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
358 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
359 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
360 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
361 self.checkJoin(RFC3986_BASE, '../..','http://a/')
362 self.checkJoin(RFC3986_BASE, '../../','http://a/')
363 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400364 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000365
366 #Abnormal Examples
367
368 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
369 # Tests are here for reference.
370
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400371 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
372 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
373 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
374 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000375 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
376 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
377 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
378 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
379 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
380 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
381 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
382 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
383 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
384 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
385 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
386 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
387 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
388 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
389 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
390 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000391
Senthil Kumarandca5b862010-12-17 04:48:45 +0000392 # Test for issue9721
393 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
394
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000395 def test_urljoins(self):
396 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
397 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
398 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
399 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
400 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
401 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
402 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
403 self.checkJoin(SIMPLE_BASE, '//g','http://g')
404 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
405 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
406 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
407 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
408 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
409 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
410 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
411 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
412 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
413 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000414 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
415 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000416 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
417 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
418 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
419 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
420 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
421 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
422 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800423 self.checkJoin('http:///', '..','http:///')
424 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
425 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800426 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800427 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000428
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400429 # XXX: The following tests are no longer compatible with RFC3986
430 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
431 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
432
Senthil Kumarana66e3882014-09-22 15:49:16 +0800433 # test for issue22118 duplicate slashes
434 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
435
436 # Non-RFC-defined tests, covering variations of base and trailing
437 # slashes
438 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
439 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
440 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
441 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
442 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
443 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
444
Berker Peksag20416f72015-04-16 02:31:14 +0300445 # issue 23703: don't duplicate filename
446 self.checkJoin('a', 'b', 'b')
447
Senthil Kumaranad02d232010-04-16 03:02:13 +0000448 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000449 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000450 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
451 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
452 ('http://[::1]:5432/foo/', '::1', 5432),
453 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
454 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
455 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
456 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
457 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
458 ('http://[::ffff:12.34.56.78]:5432/foo/',
459 '::ffff:12.34.56.78', 5432),
460 ('http://Test.python.org/foo/', 'test.python.org', None),
461 ('http://12.34.56.78/foo/', '12.34.56.78', None),
462 ('http://[::1]/foo/', '::1', None),
463 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
464 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
465 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
466 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
467 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
468 ('http://[::ffff:12.34.56.78]/foo/',
469 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200470 ('http://Test.python.org:/foo/', 'test.python.org', None),
471 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
472 ('http://[::1]:/foo/', '::1', None),
473 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
474 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
475 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
476 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
477 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
478 ('http://[::ffff:12.34.56.78]:/foo/',
479 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000480 ]
481 def _encode(t):
482 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
483 bytes_cases = [_encode(x) for x in str_cases]
484 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000485 urlparsed = urllib.parse.urlparse(url)
486 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
487
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000488 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000489 'http://::12.34.56.78]/',
490 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000491 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000492 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000493 'http://[::ffff:12.34.56.78']
494 bytes_cases = [x.encode('ascii') for x in str_cases]
495 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000496 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000497
Fred Drake70705652002-10-16 21:02:36 +0000498 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000499 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000500 ('http://python.org#frag', 'http://python.org', 'frag'),
501 ('http://python.org', 'http://python.org', ''),
502 ('http://python.org/#frag', 'http://python.org/', 'frag'),
503 ('http://python.org/', 'http://python.org/', ''),
504 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
505 ('http://python.org/?q', 'http://python.org/?q', ''),
506 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
507 ('http://python.org/p?q', 'http://python.org/p?q', ''),
508 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
509 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000510 ]
511 def _encode(t):
512 return type(t)(x.encode('ascii') for x in t)
513 bytes_cases = [_encode(x) for x in str_cases]
514 for url, defrag, frag in str_cases + bytes_cases:
515 result = urllib.parse.urldefrag(url)
516 self.assertEqual(result.geturl(), url)
517 self.assertEqual(result, (defrag, frag))
518 self.assertEqual(result.url, defrag)
519 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000520
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000521 def test_urlsplit_attributes(self):
522 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000523 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000524 self.assertEqual(p.scheme, "http")
525 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
526 self.assertEqual(p.path, "/doc/")
527 self.assertEqual(p.query, "")
528 self.assertEqual(p.fragment, "frag")
529 self.assertEqual(p.username, None)
530 self.assertEqual(p.password, None)
531 self.assertEqual(p.hostname, "www.python.org")
532 self.assertEqual(p.port, None)
533 # geturl() won't return exactly the original URL in this case
534 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000535 # We handle this by ignoring the first 4 characters of the URL
536 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000537
538 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000539 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000540 self.assertEqual(p.scheme, "http")
541 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
542 self.assertEqual(p.path, "/doc/")
543 self.assertEqual(p.query, "query=yes")
544 self.assertEqual(p.fragment, "frag")
545 self.assertEqual(p.username, "User")
546 self.assertEqual(p.password, "Pass")
547 self.assertEqual(p.hostname, "www.python.org")
548 self.assertEqual(p.port, 80)
549 self.assertEqual(p.geturl(), url)
550
Christian Heimesfaf2f632008-01-06 16:59:19 +0000551 # Addressing issue1698, which suggests Username can contain
552 # "@" characters. Though not RFC compliant, many ftp sites allow
553 # and request email addresses as usernames.
554
555 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000556 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000557 self.assertEqual(p.scheme, "http")
558 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
559 self.assertEqual(p.path, "/doc/")
560 self.assertEqual(p.query, "query=yes")
561 self.assertEqual(p.fragment, "frag")
562 self.assertEqual(p.username, "User@example.com")
563 self.assertEqual(p.password, "Pass")
564 self.assertEqual(p.hostname, "www.python.org")
565 self.assertEqual(p.port, 80)
566 self.assertEqual(p.geturl(), url)
567
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000568 # And check them all again, only with bytes this time
569 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
570 p = urllib.parse.urlsplit(url)
571 self.assertEqual(p.scheme, b"http")
572 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
573 self.assertEqual(p.path, b"/doc/")
574 self.assertEqual(p.query, b"")
575 self.assertEqual(p.fragment, b"frag")
576 self.assertEqual(p.username, None)
577 self.assertEqual(p.password, None)
578 self.assertEqual(p.hostname, b"www.python.org")
579 self.assertEqual(p.port, None)
580 self.assertEqual(p.geturl()[4:], url[4:])
581
582 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
583 p = urllib.parse.urlsplit(url)
584 self.assertEqual(p.scheme, b"http")
585 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
586 self.assertEqual(p.path, b"/doc/")
587 self.assertEqual(p.query, b"query=yes")
588 self.assertEqual(p.fragment, b"frag")
589 self.assertEqual(p.username, b"User")
590 self.assertEqual(p.password, b"Pass")
591 self.assertEqual(p.hostname, b"www.python.org")
592 self.assertEqual(p.port, 80)
593 self.assertEqual(p.geturl(), url)
594
595 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
596 p = urllib.parse.urlsplit(url)
597 self.assertEqual(p.scheme, b"http")
598 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
599 self.assertEqual(p.path, b"/doc/")
600 self.assertEqual(p.query, b"query=yes")
601 self.assertEqual(p.fragment, b"frag")
602 self.assertEqual(p.username, b"User@example.com")
603 self.assertEqual(p.password, b"Pass")
604 self.assertEqual(p.hostname, b"www.python.org")
605 self.assertEqual(p.port, 80)
606 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000607
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800608 # Verify an illegal port is returned as None
609 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
610 p = urllib.parse.urlsplit(url)
611 self.assertEqual(p.port, None)
612
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000613 def test_attributes_bad_port(self):
614 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000615 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000616 self.assertEqual(p.netloc, "www.example.net:foo")
617 self.assertRaises(ValueError, lambda: p.port)
618
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000619 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000620 self.assertEqual(p.netloc, "www.example.net:foo")
621 self.assertRaises(ValueError, lambda: p.port)
622
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000623 # Once again, repeat ourselves to test bytes
624 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
625 self.assertEqual(p.netloc, b"www.example.net:foo")
626 self.assertRaises(ValueError, lambda: p.port)
627
628 p = urllib.parse.urlparse(b"http://www.example.net:foo")
629 self.assertEqual(p.netloc, b"www.example.net:foo")
630 self.assertRaises(ValueError, lambda: p.port)
631
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000632 def test_attributes_without_netloc(self):
633 # This example is straight from RFC 3261. It looks like it
634 # should allow the username, hostname, and port to be filled
635 # in, but doesn't. Since it's a URI and doesn't use the
636 # scheme://netloc syntax, the netloc and related attributes
637 # should be left empty.
638 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000639 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000640 self.assertEqual(p.netloc, "")
641 self.assertEqual(p.username, None)
642 self.assertEqual(p.password, None)
643 self.assertEqual(p.hostname, None)
644 self.assertEqual(p.port, None)
645 self.assertEqual(p.geturl(), uri)
646
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000647 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000648 self.assertEqual(p.netloc, "")
649 self.assertEqual(p.username, None)
650 self.assertEqual(p.password, None)
651 self.assertEqual(p.hostname, None)
652 self.assertEqual(p.port, None)
653 self.assertEqual(p.geturl(), uri)
654
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000655 # You guessed it, repeating the test with bytes input
656 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
657 p = urllib.parse.urlsplit(uri)
658 self.assertEqual(p.netloc, b"")
659 self.assertEqual(p.username, None)
660 self.assertEqual(p.password, None)
661 self.assertEqual(p.hostname, None)
662 self.assertEqual(p.port, None)
663 self.assertEqual(p.geturl(), uri)
664
665 p = urllib.parse.urlparse(uri)
666 self.assertEqual(p.netloc, b"")
667 self.assertEqual(p.username, None)
668 self.assertEqual(p.password, None)
669 self.assertEqual(p.hostname, None)
670 self.assertEqual(p.port, None)
671 self.assertEqual(p.geturl(), uri)
672
Christian Heimesfaf2f632008-01-06 16:59:19 +0000673 def test_noslash(self):
674 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000675 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000676 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000677 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
678 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000679
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000680 def test_withoutscheme(self):
681 # Test urlparse without scheme
682 # Issue 754016: urlparse goes wrong with IP:port without scheme
683 # RFC 1808 specifies that netloc should start with //, urlparse expects
684 # the same, otherwise it classifies the portion of url as path.
685 self.assertEqual(urllib.parse.urlparse("path"),
686 ('','','path','','',''))
687 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
688 ('','www.python.org:80','','','',''))
689 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
690 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000691 # Repeat for bytes input
692 self.assertEqual(urllib.parse.urlparse(b"path"),
693 (b'',b'',b'path',b'',b'',b''))
694 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
695 (b'',b'www.python.org:80',b'',b'',b'',b''))
696 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
697 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000698
699 def test_portseparator(self):
700 # Issue 754016 makes changes for port separator ':' from scheme separator
701 self.assertEqual(urllib.parse.urlparse("path:80"),
702 ('','','path:80','','',''))
703 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
704 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
705 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
706 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000707 # As usual, need to check bytes input as well
708 self.assertEqual(urllib.parse.urlparse(b"path:80"),
709 (b'',b'',b'path:80',b'',b'',b''))
710 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
711 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
712 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
713 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000714
Facundo Batista2ac5de22008-07-07 18:24:11 +0000715 def test_usingsys(self):
716 # Issue 3314: sys module is used in the error
717 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
718
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000719 def test_anyscheme(self):
720 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000721 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
722 ('s3', 'foo.com', '/stuff', '', '', ''))
723 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
724 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800725 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
726 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
727 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
728 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
729
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000730 # And for bytes...
731 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
732 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
733 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
734 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800735 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
736 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
737 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
738 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000739
Berker Peksag89584c92015-06-25 23:38:48 +0300740 def test_default_scheme(self):
741 # Exercise the scheme parameter of urlparse() and urlsplit()
742 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
743 with self.subTest(function=func):
744 result = func("http://example.net/", "ftp")
745 self.assertEqual(result.scheme, "http")
746 result = func(b"http://example.net/", b"ftp")
747 self.assertEqual(result.scheme, b"http")
748 self.assertEqual(func("path", "ftp").scheme, "ftp")
749 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
750 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
751 self.assertEqual(func("path").scheme, "")
752 self.assertEqual(func(b"path").scheme, b"")
753 self.assertEqual(func(b"path", "").scheme, b"")
754
755 def test_parse_fragments(self):
756 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
757 tests = (
758 ("http:#frag", "path"),
759 ("//example.net#frag", "path"),
760 ("index.html#frag", "path"),
761 (";a=b#frag", "params"),
762 ("?a=b#frag", "query"),
763 ("#frag", "path"),
764 )
765 for url, attr in tests:
766 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
767 if attr == "params" and func is urllib.parse.urlsplit:
768 attr = "path"
769 with self.subTest(url=url, function=func):
770 result = func(url, allow_fragments=False)
771 self.assertEqual(result.fragment, "")
772 self.assertTrue(getattr(result, attr).endswith("#frag"))
773 self.assertEqual(func(url, "", False).fragment, "")
774
775 result = func(url, allow_fragments=True)
776 self.assertEqual(result.fragment, "frag")
777 self.assertFalse(getattr(result, attr).endswith("frag"))
778 self.assertEqual(func(url, "", True).fragment, "frag")
779 self.assertEqual(func(url).fragment, "frag")
780
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000781 def test_mixed_types_rejected(self):
782 # Several functions that process either strings or ASCII encoded bytes
783 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000784 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000785 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000786 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000787 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000788 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000789 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000790 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000791 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000792 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000793 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000794 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000795 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000796 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000797 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000798 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000799 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000800 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000801 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000802 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000803 urllib.parse.urljoin(b"http://python.org", "http://python.org")
804
805 def _check_result_type(self, str_type):
806 num_args = len(str_type._fields)
807 bytes_type = str_type._encoded_counterpart
808 self.assertIs(bytes_type._decoded_counterpart, str_type)
809 str_args = ('',) * num_args
810 bytes_args = (b'',) * num_args
811 str_result = str_type(*str_args)
812 bytes_result = bytes_type(*bytes_args)
813 encoding = 'ascii'
814 errors = 'strict'
815 self.assertEqual(str_result, str_args)
816 self.assertEqual(bytes_result.decode(), str_args)
817 self.assertEqual(bytes_result.decode(), str_result)
818 self.assertEqual(bytes_result.decode(encoding), str_args)
819 self.assertEqual(bytes_result.decode(encoding), str_result)
820 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
821 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
822 self.assertEqual(bytes_result, bytes_args)
823 self.assertEqual(str_result.encode(), bytes_args)
824 self.assertEqual(str_result.encode(), bytes_result)
825 self.assertEqual(str_result.encode(encoding), bytes_args)
826 self.assertEqual(str_result.encode(encoding), bytes_result)
827 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
828 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
829
830 def test_result_pairs(self):
831 # Check encoding and decoding between result pairs
832 result_types = [
833 urllib.parse.DefragResult,
834 urllib.parse.SplitResult,
835 urllib.parse.ParseResult,
836 ]
837 for result_type in result_types:
838 self._check_result_type(result_type)
839
Victor Stinner1d87deb2011-01-14 13:05:19 +0000840 def test_parse_qs_encoding(self):
841 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
842 self.assertEqual(result, {'key': ['\u0141\xE9']})
843 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
844 self.assertEqual(result, {'key': ['\u0141\xE9']})
845 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
846 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
847 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
848 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
849 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
850 errors="ignore")
851 self.assertEqual(result, {'key': ['\u0141-']})
852
853 def test_parse_qsl_encoding(self):
854 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
855 self.assertEqual(result, [('key', '\u0141\xE9')])
856 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
857 self.assertEqual(result, [('key', '\u0141\xE9')])
858 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
859 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
860 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
861 self.assertEqual(result, [('key', '\u0141\ufffd-')])
862 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
863 errors="ignore")
864 self.assertEqual(result, [('key', '\u0141-')])
865
Senthil Kumarande02a712011-07-23 18:27:45 +0800866 def test_urlencode_sequences(self):
867 # Other tests incidentally urlencode things; test non-covered cases:
868 # Sequence and object values.
869 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100870 # we cannot rely on ordering here
871 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800872
873 class Trivial:
874 def __str__(self):
875 return 'trivial'
876
877 result = urllib.parse.urlencode({'a': Trivial()}, True)
878 self.assertEqual(result, 'a=trivial')
879
R David Murrayc17686f2015-05-17 20:44:50 -0400880 def test_urlencode_quote_via(self):
881 result = urllib.parse.urlencode({'a': 'some value'})
882 self.assertEqual(result, "a=some+value")
883 result = urllib.parse.urlencode({'a': 'some value/another'},
884 quote_via=urllib.parse.quote)
885 self.assertEqual(result, "a=some%20value%2Fanother")
886 result = urllib.parse.urlencode({'a': 'some value/another'},
887 safe='/', quote_via=urllib.parse.quote)
888 self.assertEqual(result, "a=some%20value/another")
889
Senthil Kumarande02a712011-07-23 18:27:45 +0800890 def test_quote_from_bytes(self):
891 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
892 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
893 self.assertEqual(result, 'archaeological%20arcana')
894 result = urllib.parse.quote_from_bytes(b'')
895 self.assertEqual(result, '')
896
897 def test_unquote_to_bytes(self):
898 result = urllib.parse.unquote_to_bytes('abc%20def')
899 self.assertEqual(result, b'abc def')
900 result = urllib.parse.unquote_to_bytes('')
901 self.assertEqual(result, b'')
902
903 def test_quote_errors(self):
904 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
905 encoding='utf-8')
906 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000907
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300908 def test_issue14072(self):
909 p1 = urllib.parse.urlsplit('tel:+31-641044153')
910 self.assertEqual(p1.scheme, 'tel')
911 self.assertEqual(p1.path, '+31-641044153')
912 p2 = urllib.parse.urlsplit('tel:+31641044153')
913 self.assertEqual(p2.scheme, 'tel')
914 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800915 # assert the behavior for urlparse
916 p1 = urllib.parse.urlparse('tel:+31-641044153')
917 self.assertEqual(p1.scheme, 'tel')
918 self.assertEqual(p1.path, '+31-641044153')
919 p2 = urllib.parse.urlparse('tel:+31641044153')
920 self.assertEqual(p2.scheme, 'tel')
921 self.assertEqual(p2.path, '+31641044153')
922
923 def test_telurl_params(self):
924 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
925 self.assertEqual(p1.scheme, 'tel')
926 self.assertEqual(p1.path, '123-4')
927 self.assertEqual(p1.params, 'phone-context=+1-650-516')
928
929 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
930 self.assertEqual(p1.scheme, 'tel')
931 self.assertEqual(p1.path, '+1-201-555-0123')
932 self.assertEqual(p1.params, '')
933
934 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
935 self.assertEqual(p1.scheme, 'tel')
936 self.assertEqual(p1.path, '7042')
937 self.assertEqual(p1.params, 'phone-context=example.com')
938
939 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
940 self.assertEqual(p1.scheme, 'tel')
941 self.assertEqual(p1.path, '863-1234')
942 self.assertEqual(p1.params, 'phone-context=+1-914-555')
943
R David Murrayf5163882013-03-21 20:56:51 -0400944 def test_Quoter_repr(self):
945 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
946 self.assertIn('Quoter', repr(quoter))
947
Serhiy Storchaka15154502015-04-07 19:09:01 +0300948 def test_all(self):
949 expected = []
950 undocumented = {
951 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
952 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
953 'splitvalue',
954 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
955 }
956 for name in dir(urllib.parse):
957 if name.startswith('_') or name in undocumented:
958 continue
959 object = getattr(urllib.parse, name)
960 if getattr(object, '__module__', None) == 'urllib.parse':
961 expected.append(name)
962 self.assertCountEqual(urllib.parse.__all__, expected)
963
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000964
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200965class Utility_Tests(unittest.TestCase):
966 """Testcase to test the various utility functions in the urllib."""
967 # In Python 2 this test class was in test_urllib.
968
969 def test_splittype(self):
970 splittype = urllib.parse.splittype
971 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
972 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
973 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
974 self.assertEqual(splittype('type:'), ('type', ''))
975 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
976
977 def test_splithost(self):
978 splithost = urllib.parse.splithost
979 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
980 ('www.example.org:80', '/foo/bar/baz.html'))
981 self.assertEqual(splithost('//www.example.org:80'),
982 ('www.example.org:80', ''))
983 self.assertEqual(splithost('/foo/bar/baz.html'),
984 (None, '/foo/bar/baz.html'))
985
986 def test_splituser(self):
987 splituser = urllib.parse.splituser
988 self.assertEqual(splituser('User:Pass@www.python.org:080'),
989 ('User:Pass', 'www.python.org:080'))
990 self.assertEqual(splituser('@www.python.org:080'),
991 ('', 'www.python.org:080'))
992 self.assertEqual(splituser('www.python.org:080'),
993 (None, 'www.python.org:080'))
994 self.assertEqual(splituser('User:Pass@'),
995 ('User:Pass', ''))
996 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
997 ('User@example.com:Pass', 'www.python.org:080'))
998
999 def test_splitpasswd(self):
1000 # Some of the password examples are not sensible, but it is added to
1001 # confirming to RFC2617 and addressing issue4675.
1002 splitpasswd = urllib.parse.splitpasswd
1003 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1004 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1005 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1006 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1007 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1008 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1009 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1010 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1011 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1012 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1013 self.assertEqual(splitpasswd('user:'), ('user', ''))
1014 self.assertEqual(splitpasswd('user'), ('user', None))
1015 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1016
1017 def test_splitport(self):
1018 splitport = urllib.parse.splitport
1019 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1020 self.assertEqual(splitport('parrot'), ('parrot', None))
1021 self.assertEqual(splitport('parrot:'), ('parrot', None))
1022 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1023 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1024 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1025 self.assertEqual(splitport('[::1]'), ('[::1]', None))
1026 self.assertEqual(splitport(':88'), ('', '88'))
1027
1028 def test_splitnport(self):
1029 splitnport = urllib.parse.splitnport
1030 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1031 self.assertEqual(splitnport('parrot'), ('parrot', -1))
1032 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1033 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1034 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1035 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1036 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1037 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1038 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1039
1040 def test_splitquery(self):
1041 # Normal cases are exercised by other tests; ensure that we also
1042 # catch cases with no port specified (testcase ensuring coverage)
1043 splitquery = urllib.parse.splitquery
1044 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1045 ('http://python.org/fake', 'foo=bar'))
1046 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1047 ('http://python.org/fake?foo=bar', ''))
1048 self.assertEqual(splitquery('http://python.org/fake'),
1049 ('http://python.org/fake', None))
1050 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1051
1052 def test_splittag(self):
1053 splittag = urllib.parse.splittag
1054 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1055 ('http://example.com?foo=bar', 'baz'))
1056 self.assertEqual(splittag('http://example.com?foo=bar#'),
1057 ('http://example.com?foo=bar', ''))
1058 self.assertEqual(splittag('#baz'), ('', 'baz'))
1059 self.assertEqual(splittag('http://example.com?foo=bar'),
1060 ('http://example.com?foo=bar', None))
1061 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1062 ('http://example.com?foo=bar#baz', 'boo'))
1063
1064 def test_splitattr(self):
1065 splitattr = urllib.parse.splitattr
1066 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1067 ('/path', ['attr1=value1', 'attr2=value2']))
1068 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1069 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1070 ('', ['attr1=value1', 'attr2=value2']))
1071 self.assertEqual(splitattr('/path'), ('/path', []))
1072
1073 def test_splitvalue(self):
1074 # Normal cases are exercised by other tests; test pathological cases
1075 # with no key/value pairs. (testcase ensuring coverage)
1076 splitvalue = urllib.parse.splitvalue
1077 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1078 self.assertEqual(splitvalue('foo='), ('foo', ''))
1079 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1080 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1081 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1082
1083 def test_to_bytes(self):
1084 result = urllib.parse.to_bytes('http://www.python.org')
1085 self.assertEqual(result, 'http://www.python.org')
1086 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
1087 'http://www.python.org/medi\u00e6val')
1088
1089 def test_unwrap(self):
1090 url = urllib.parse.unwrap('<URL:type://host/path>')
1091 self.assertEqual(url, 'type://host/path')
1092
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001093
1094if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001095 unittest.main()