blob: 762500789f73acfd4ee38d77ce82188f08e894fc [file] [log] [blame]
Steve Dower16e6f7d2019-03-07 08:02:26 -08001import sys
2import unicodedata
Skip Montanaro6ec967d2002-03-23 05:32:10 +00003import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00004import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00005
Fred Drakea4d18a02001-01-05 05:57:04 +00006RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00007RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00008RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00009SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000010
Senthil Kumaran257b9802017-04-04 21:19:43 -070011# Each parse_qsl testcase is a two-tuple that contains
12# a string with the query and a list with the expected result.
Facundo Batistac469d4c2008-09-03 22:49:01 +000013
14parse_qsl_test_cases = [
15 ("", []),
16 ("&", []),
17 ("&&", []),
18 ("=", [('', '')]),
19 ("=a", [('', 'a')]),
20 ("a", [('a', '')]),
21 ("a=", [('a', '')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000022 ("&a=b", [('a', 'b')]),
23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000025 (b"", []),
26 (b"&", []),
27 (b"&&", []),
28 (b"=", [(b'', b'')]),
29 (b"=a", [(b'', b'a')]),
30 (b"a", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000032 (b"&a=b", [(b'a', b'b')]),
33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Senthil Kumarane38415e2016-04-16 07:33:15 -070035 (";", []),
36 (";;", []),
37 (";a=b", [('a', 'b')]),
38 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
39 ("a=1;a=2", [('a', '1'), ('a', '2')]),
40 (b";", []),
41 (b";;", []),
42 (b";a=b", [(b'a', b'b')]),
43 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
44 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
45]
46
Senthil Kumaran257b9802017-04-04 21:19:43 -070047# Each parse_qs testcase is a two-tuple that contains
48# a string with the query and a dictionary with the expected result.
49
Senthil Kumarane38415e2016-04-16 07:33:15 -070050parse_qs_test_cases = [
51 ("", {}),
52 ("&", {}),
53 ("&&", {}),
54 ("=", {'': ['']}),
55 ("=a", {'': ['a']}),
56 ("a", {'a': ['']}),
57 ("a=", {'a': ['']}),
58 ("&a=b", {'a': ['b']}),
59 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
60 ("a=1&a=2", {'a': ['1', '2']}),
61 (b"", {}),
62 (b"&", {}),
63 (b"&&", {}),
64 (b"=", {b'': [b'']}),
65 (b"=a", {b'': [b'a']}),
66 (b"a", {b'a': [b'']}),
67 (b"a=", {b'a': [b'']}),
68 (b"&a=b", {b'a': [b'b']}),
69 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
70 (b"a=1&a=2", {b'a': [b'1', b'2']}),
71 (";", {}),
72 (";;", {}),
73 (";a=b", {'a': ['b']}),
74 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
75 ("a=1;a=2", {'a': ['1', '2']}),
76 (b";", {}),
77 (b";;", {}),
78 (b";a=b", {b'a': [b'b']}),
79 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
80 (b"a=1;a=2", {b'a': [b'1', b'2']}),
Facundo Batistac469d4c2008-09-03 22:49:01 +000081]
82
Skip Montanaro6ec967d2002-03-23 05:32:10 +000083class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000084
85 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000087 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000088 t = (result.scheme, result.netloc, result.path,
89 result.params, result.query, result.fragment)
90 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000091 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000092 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000093 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000094 self.assertEqual(result2, result.geturl())
95
96 # the result of geturl() is a fixpoint; we can always parse it
97 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000098 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000099 self.assertEqual(result3.geturl(), result.geturl())
100 self.assertEqual(result3, result)
101 self.assertEqual(result3.scheme, result.scheme)
102 self.assertEqual(result3.netloc, result.netloc)
103 self.assertEqual(result3.path, result.path)
104 self.assertEqual(result3.params, result.params)
105 self.assertEqual(result3.query, result.query)
106 self.assertEqual(result3.fragment, result.fragment)
107 self.assertEqual(result3.username, result.username)
108 self.assertEqual(result3.password, result.password)
109 self.assertEqual(result3.hostname, result.hostname)
110 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000111
112 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000114 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000115 t = (result.scheme, result.netloc, result.path,
116 result.query, result.fragment)
117 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000118 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000119 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000120 self.assertEqual(result2, result.geturl())
121
122 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000123 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000124 self.assertEqual(result3.geturl(), result.geturl())
125 self.assertEqual(result3, result)
126 self.assertEqual(result3.scheme, result.scheme)
127 self.assertEqual(result3.netloc, result.netloc)
128 self.assertEqual(result3.path, result.path)
129 self.assertEqual(result3.query, result.query)
130 self.assertEqual(result3.fragment, result.fragment)
131 self.assertEqual(result3.username, result.username)
132 self.assertEqual(result3.password, result.password)
133 self.assertEqual(result3.hostname, result.hostname)
134 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000135
Facundo Batistac469d4c2008-09-03 22:49:01 +0000136 def test_qsl(self):
137 for orig, expect in parse_qsl_test_cases:
138 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +0800139 self.assertEqual(result, expect, "Error parsing %r" % orig)
140 expect_without_blanks = [v for v in expect if len(v[1])]
141 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
142 self.assertEqual(result, expect_without_blanks,
143 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000144
Senthil Kumarane38415e2016-04-16 07:33:15 -0700145 def test_qs(self):
146 for orig, expect in parse_qs_test_cases:
147 result = urllib.parse.parse_qs(orig, keep_blank_values=True)
148 self.assertEqual(result, expect, "Error parsing %r" % orig)
149 expect_without_blanks = {v: expect[v]
150 for v in expect if len(expect[v][0])}
151 result = urllib.parse.parse_qs(orig, keep_blank_values=False)
152 self.assertEqual(result, expect_without_blanks,
153 "Error parsing %r" % orig)
154
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000155 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000156 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000157 ('file:///tmp/junk.txt',
158 ('file', '', '/tmp/junk.txt', '', '', ''),
159 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000160 ('imap://mail.python.org/mbox1',
161 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
162 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000163 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000164 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
165 '', '', ''),
166 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
167 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000168 ('nfs://server/path/to/file.txt',
169 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
170 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000171 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
172 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
173 '', '', ''),
174 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000175 '', '')),
176 ('git+ssh://git@github.com/user/project.git',
177 ('git+ssh', 'git@github.com','/user/project.git',
178 '','',''),
179 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000180 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000181 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000182 def _encode(t):
183 return (t[0].encode('ascii'),
184 tuple(x.encode('ascii') for x in t[1]),
185 tuple(x.encode('ascii') for x in t[2]))
186 bytes_cases = [_encode(x) for x in str_cases]
187 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000188 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000189
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000190 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000191 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000192 # so we test both 'http:' and 'https:' in all the following.
193 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000194 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000195 ('://www.python.org',
196 ('www.python.org', '', '', '', ''),
197 ('www.python.org', '', '', '')),
198 ('://www.python.org#abc',
199 ('www.python.org', '', '', '', 'abc'),
200 ('www.python.org', '', '', 'abc')),
201 ('://www.python.org?q=abc',
202 ('www.python.org', '', '', 'q=abc', ''),
203 ('www.python.org', '', 'q=abc', '')),
204 ('://www.python.org/#abc',
205 ('www.python.org', '/', '', '', 'abc'),
206 ('www.python.org', '/', '', 'abc')),
207 ('://a/b/c/d;p?q#f',
208 ('a', '/b/c/d', 'p', 'q', 'f'),
209 ('a', '/b/c/d;p', 'q', 'f')),
210 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000211 def _encode(t):
212 return (t[0].encode('ascii'),
213 tuple(x.encode('ascii') for x in t[1]),
214 tuple(x.encode('ascii') for x in t[2]))
215 bytes_cases = [_encode(x) for x in str_cases]
216 str_schemes = ('http', 'https')
217 bytes_schemes = (b'http', b'https')
218 str_tests = str_schemes, str_cases
219 bytes_tests = bytes_schemes, bytes_cases
220 for schemes, test_cases in (str_tests, bytes_tests):
221 for scheme in schemes:
222 for url, parsed, split in test_cases:
223 url = scheme + url
224 parsed = (scheme,) + parsed
225 split = (scheme,) + split
226 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000227
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000228 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000229 str_components = (base, relurl, expected)
230 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
231 bytes_components = baseb, relurlb, expectedb = [
232 x.encode('ascii') for x in str_components]
233 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000234
235 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000236 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
237 bytes_cases = [x.encode('ascii') for x in str_cases]
238 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000239 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
240 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000241
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000242 def test_RFC1808(self):
243 # "normal" cases from RFC 1808:
244 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
245 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
246 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
247 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
248 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
249 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000250 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
251 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
252 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
253 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
254 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
255 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000256 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
257 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
258 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
259 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
260 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
261 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
262 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
263 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
264 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
265 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000266
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000267 # "abnormal" cases from RFC 1808:
268 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000269 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
270 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
271 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
272 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
273 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
274 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
275 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
276 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000277
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000278 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
279 # so we'll not actually run these tests (which expect 1808 behavior).
280 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
281 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000282
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400283 # XXX: The following tests are no longer compatible with RFC3986
284 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
285 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
286 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
287 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
288
289
Senthil Kumaran397eb442011-04-15 18:20:24 +0800290 def test_RFC2368(self):
291 # Issue 11467: path that starts with a number is not parsed correctly
292 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
293 ('mailto', '', '1337@example.org', '', '', ''))
294
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000295 def test_RFC2396(self):
296 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000297
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000298 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
299 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
300 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
301 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
302 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
303 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
304 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
305 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
306 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
307 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
308 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
309 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
310 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
311 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
312 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
313 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
314 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
315 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
316 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
317 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
318 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000319 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
320 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
321 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
322 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
323 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
324 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
325 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
326 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
327 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
328 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
329 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
330 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
331 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
332 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
333
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400334 # XXX: The following tests are no longer compatible with RFC3986
335 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
336 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
337 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
338 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
339
Facundo Batista23e38562008-08-14 16:55:14 +0000340 def test_RFC3986(self):
341 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400342 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000343 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
344 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
345 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
346 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
347 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
348 self.checkJoin(RFC3986_BASE, '//g','http://g')
349 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
350 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
351 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
352 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
353 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
354 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
355 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
356 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
357 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
358 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
359 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
360 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
361 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
362 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
363 self.checkJoin(RFC3986_BASE, '../..','http://a/')
364 self.checkJoin(RFC3986_BASE, '../../','http://a/')
365 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400366 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000367
Senthil Kumaran257b9802017-04-04 21:19:43 -0700368 # Abnormal Examples
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000369
370 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
371 # Tests are here for reference.
372
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400373 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
374 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
375 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
376 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000377 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
378 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
379 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
380 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
381 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
382 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
383 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
384 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
385 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
386 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
387 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
388 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
389 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
390 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
391 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
392 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000393
Senthil Kumarandca5b862010-12-17 04:48:45 +0000394 # Test for issue9721
395 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
396
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000397 def test_urljoins(self):
398 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
399 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
400 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
401 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
402 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
403 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
404 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
405 self.checkJoin(SIMPLE_BASE, '//g','http://g')
406 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
407 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
408 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
409 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
410 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
411 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
412 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
413 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
414 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
415 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000416 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
417 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000418 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
419 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
420 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
421 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
422 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
423 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
424 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800425 self.checkJoin('http:///', '..','http:///')
426 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
427 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800428 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800429 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Berker Peksagf6767482016-09-16 14:43:58 +0300430 self.checkJoin('ws://a/b','g','ws://a/g')
431 self.checkJoin('wss://a/b','g','wss://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000432
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400433 # XXX: The following tests are no longer compatible with RFC3986
434 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
435 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
436
Senthil Kumarana66e3882014-09-22 15:49:16 +0800437 # test for issue22118 duplicate slashes
438 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
439
440 # Non-RFC-defined tests, covering variations of base and trailing
441 # slashes
442 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
443 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
444 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
445 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
446 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
447 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
448
Berker Peksag20416f72015-04-16 02:31:14 +0300449 # issue 23703: don't duplicate filename
450 self.checkJoin('a', 'b', 'b')
451
Senthil Kumaranad02d232010-04-16 03:02:13 +0000452 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000453 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000454 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
455 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
456 ('http://[::1]:5432/foo/', '::1', 5432),
457 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
458 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
459 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
460 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
461 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
462 ('http://[::ffff:12.34.56.78]:5432/foo/',
463 '::ffff:12.34.56.78', 5432),
464 ('http://Test.python.org/foo/', 'test.python.org', None),
465 ('http://12.34.56.78/foo/', '12.34.56.78', None),
466 ('http://[::1]/foo/', '::1', None),
467 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
468 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
469 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
470 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
471 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
472 ('http://[::ffff:12.34.56.78]/foo/',
473 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200474 ('http://Test.python.org:/foo/', 'test.python.org', None),
475 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
476 ('http://[::1]:/foo/', '::1', None),
477 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
478 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
479 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
480 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
481 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
482 ('http://[::ffff:12.34.56.78]:/foo/',
483 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000484 ]
485 def _encode(t):
486 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
487 bytes_cases = [_encode(x) for x in str_cases]
488 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000489 urlparsed = urllib.parse.urlparse(url)
490 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
491
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000492 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000493 'http://::12.34.56.78]/',
494 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000495 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000496 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000497 'http://[::ffff:12.34.56.78']
498 bytes_cases = [x.encode('ascii') for x in str_cases]
499 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000500 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000501
Fred Drake70705652002-10-16 21:02:36 +0000502 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000503 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000504 ('http://python.org#frag', 'http://python.org', 'frag'),
505 ('http://python.org', 'http://python.org', ''),
506 ('http://python.org/#frag', 'http://python.org/', 'frag'),
507 ('http://python.org/', 'http://python.org/', ''),
508 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
509 ('http://python.org/?q', 'http://python.org/?q', ''),
510 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
511 ('http://python.org/p?q', 'http://python.org/p?q', ''),
512 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
513 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000514 ]
515 def _encode(t):
516 return type(t)(x.encode('ascii') for x in t)
517 bytes_cases = [_encode(x) for x in str_cases]
518 for url, defrag, frag in str_cases + bytes_cases:
519 result = urllib.parse.urldefrag(url)
520 self.assertEqual(result.geturl(), url)
521 self.assertEqual(result, (defrag, frag))
522 self.assertEqual(result.url, defrag)
523 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000524
Коренберг Маркfbd60512017-12-21 17:16:17 +0500525 def test_urlsplit_scoped_IPv6(self):
526 p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
527 self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
528 self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
529
530 p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
531 self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
532 self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
533
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000534 def test_urlsplit_attributes(self):
535 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000536 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000537 self.assertEqual(p.scheme, "http")
538 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
539 self.assertEqual(p.path, "/doc/")
540 self.assertEqual(p.query, "")
541 self.assertEqual(p.fragment, "frag")
542 self.assertEqual(p.username, None)
543 self.assertEqual(p.password, None)
544 self.assertEqual(p.hostname, "www.python.org")
545 self.assertEqual(p.port, None)
546 # geturl() won't return exactly the original URL in this case
547 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000548 # We handle this by ignoring the first 4 characters of the URL
549 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000550
551 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000552 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000553 self.assertEqual(p.scheme, "http")
554 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
555 self.assertEqual(p.path, "/doc/")
556 self.assertEqual(p.query, "query=yes")
557 self.assertEqual(p.fragment, "frag")
558 self.assertEqual(p.username, "User")
559 self.assertEqual(p.password, "Pass")
560 self.assertEqual(p.hostname, "www.python.org")
561 self.assertEqual(p.port, 80)
562 self.assertEqual(p.geturl(), url)
563
Christian Heimesfaf2f632008-01-06 16:59:19 +0000564 # Addressing issue1698, which suggests Username can contain
565 # "@" characters. Though not RFC compliant, many ftp sites allow
566 # and request email addresses as usernames.
567
568 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000569 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000570 self.assertEqual(p.scheme, "http")
571 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
572 self.assertEqual(p.path, "/doc/")
573 self.assertEqual(p.query, "query=yes")
574 self.assertEqual(p.fragment, "frag")
575 self.assertEqual(p.username, "User@example.com")
576 self.assertEqual(p.password, "Pass")
577 self.assertEqual(p.hostname, "www.python.org")
578 self.assertEqual(p.port, 80)
579 self.assertEqual(p.geturl(), url)
580
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000581 # And check them all again, only with bytes this time
582 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
583 p = urllib.parse.urlsplit(url)
584 self.assertEqual(p.scheme, b"http")
585 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
586 self.assertEqual(p.path, b"/doc/")
587 self.assertEqual(p.query, b"")
588 self.assertEqual(p.fragment, b"frag")
589 self.assertEqual(p.username, None)
590 self.assertEqual(p.password, None)
591 self.assertEqual(p.hostname, b"www.python.org")
592 self.assertEqual(p.port, None)
593 self.assertEqual(p.geturl()[4:], url[4:])
594
595 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
596 p = urllib.parse.urlsplit(url)
597 self.assertEqual(p.scheme, b"http")
598 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
599 self.assertEqual(p.path, b"/doc/")
600 self.assertEqual(p.query, b"query=yes")
601 self.assertEqual(p.fragment, b"frag")
602 self.assertEqual(p.username, b"User")
603 self.assertEqual(p.password, b"Pass")
604 self.assertEqual(p.hostname, b"www.python.org")
605 self.assertEqual(p.port, 80)
606 self.assertEqual(p.geturl(), url)
607
608 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
609 p = urllib.parse.urlsplit(url)
610 self.assertEqual(p.scheme, b"http")
611 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
612 self.assertEqual(p.path, b"/doc/")
613 self.assertEqual(p.query, b"query=yes")
614 self.assertEqual(p.fragment, b"frag")
615 self.assertEqual(p.username, b"User@example.com")
616 self.assertEqual(p.password, b"Pass")
617 self.assertEqual(p.hostname, b"www.python.org")
618 self.assertEqual(p.port, 80)
619 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000620
Robert Collinsdfa95c92015-08-10 09:53:30 +1200621 # Verify an illegal port raises ValueError
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800622 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
623 p = urllib.parse.urlsplit(url)
Robert Collinsdfa95c92015-08-10 09:53:30 +1200624 with self.assertRaisesRegex(ValueError, "out of range"):
625 p.port
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800626
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000627 def test_attributes_bad_port(self):
Robert Collinsdfa95c92015-08-10 09:53:30 +1200628 """Check handling of invalid ports."""
629 for bytes in (False, True):
630 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
631 for port in ("foo", "1.5", "-1", "0x10"):
632 with self.subTest(bytes=bytes, parse=parse, port=port):
633 netloc = "www.example.net:" + port
634 url = "http://" + netloc
635 if bytes:
636 netloc = netloc.encode("ascii")
637 url = url.encode("ascii")
638 p = parse(url)
639 self.assertEqual(p.netloc, netloc)
640 with self.assertRaises(ValueError):
641 p.port
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000642
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000643 def test_attributes_without_netloc(self):
644 # This example is straight from RFC 3261. It looks like it
645 # should allow the username, hostname, and port to be filled
646 # in, but doesn't. Since it's a URI and doesn't use the
647 # scheme://netloc syntax, the netloc and related attributes
648 # should be left empty.
649 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000650 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000651 self.assertEqual(p.netloc, "")
652 self.assertEqual(p.username, None)
653 self.assertEqual(p.password, None)
654 self.assertEqual(p.hostname, None)
655 self.assertEqual(p.port, None)
656 self.assertEqual(p.geturl(), uri)
657
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000658 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000659 self.assertEqual(p.netloc, "")
660 self.assertEqual(p.username, None)
661 self.assertEqual(p.password, None)
662 self.assertEqual(p.hostname, None)
663 self.assertEqual(p.port, None)
664 self.assertEqual(p.geturl(), uri)
665
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000666 # You guessed it, repeating the test with bytes input
667 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
668 p = urllib.parse.urlsplit(uri)
669 self.assertEqual(p.netloc, b"")
670 self.assertEqual(p.username, None)
671 self.assertEqual(p.password, None)
672 self.assertEqual(p.hostname, None)
673 self.assertEqual(p.port, None)
674 self.assertEqual(p.geturl(), uri)
675
676 p = urllib.parse.urlparse(uri)
677 self.assertEqual(p.netloc, b"")
678 self.assertEqual(p.username, None)
679 self.assertEqual(p.password, None)
680 self.assertEqual(p.hostname, None)
681 self.assertEqual(p.port, None)
682 self.assertEqual(p.geturl(), uri)
683
Christian Heimesfaf2f632008-01-06 16:59:19 +0000684 def test_noslash(self):
685 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000686 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000687 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000688 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
689 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000690
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000691 def test_withoutscheme(self):
692 # Test urlparse without scheme
693 # Issue 754016: urlparse goes wrong with IP:port without scheme
694 # RFC 1808 specifies that netloc should start with //, urlparse expects
695 # the same, otherwise it classifies the portion of url as path.
696 self.assertEqual(urllib.parse.urlparse("path"),
697 ('','','path','','',''))
698 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
699 ('','www.python.org:80','','','',''))
700 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
701 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000702 # Repeat for bytes input
703 self.assertEqual(urllib.parse.urlparse(b"path"),
704 (b'',b'',b'path',b'',b'',b''))
705 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
706 (b'',b'www.python.org:80',b'',b'',b'',b''))
707 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
708 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000709
710 def test_portseparator(self):
711 # Issue 754016 makes changes for port separator ':' from scheme separator
Tim Graham5a88d502019-10-18 09:07:20 -0400712 self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
713 self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
714 self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000715 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
716 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
717 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
718 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000719 # As usual, need to check bytes input as well
Tim Graham5a88d502019-10-18 09:07:20 -0400720 self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
721 self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
722 self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000723 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
724 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
725 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
726 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000727
Facundo Batista2ac5de22008-07-07 18:24:11 +0000728 def test_usingsys(self):
729 # Issue 3314: sys module is used in the error
730 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
731
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000732 def test_anyscheme(self):
733 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000734 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
735 ('s3', 'foo.com', '/stuff', '', '', ''))
736 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
737 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800738 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
739 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
740 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
741 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
742
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000743 # And for bytes...
744 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
745 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
746 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
747 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800748 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
749 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
750 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
751 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000752
Berker Peksag89584c92015-06-25 23:38:48 +0300753 def test_default_scheme(self):
754 # Exercise the scheme parameter of urlparse() and urlsplit()
755 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
756 with self.subTest(function=func):
757 result = func("http://example.net/", "ftp")
758 self.assertEqual(result.scheme, "http")
759 result = func(b"http://example.net/", b"ftp")
760 self.assertEqual(result.scheme, b"http")
761 self.assertEqual(func("path", "ftp").scheme, "ftp")
762 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
763 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
764 self.assertEqual(func("path").scheme, "")
765 self.assertEqual(func(b"path").scheme, b"")
766 self.assertEqual(func(b"path", "").scheme, b"")
767
768 def test_parse_fragments(self):
769 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
770 tests = (
postmasters90e01e52017-06-20 06:02:44 -0700771 ("http:#frag", "path", "frag"),
772 ("//example.net#frag", "path", "frag"),
773 ("index.html#frag", "path", "frag"),
774 (";a=b#frag", "params", "frag"),
775 ("?a=b#frag", "query", "frag"),
776 ("#frag", "path", "frag"),
777 ("abc#@frag", "path", "@frag"),
778 ("//abc#@frag", "path", "@frag"),
779 ("//abc:80#@frag", "path", "@frag"),
780 ("//abc#@frag:80", "path", "@frag:80"),
Berker Peksag89584c92015-06-25 23:38:48 +0300781 )
postmasters90e01e52017-06-20 06:02:44 -0700782 for url, attr, expected_frag in tests:
Berker Peksag89584c92015-06-25 23:38:48 +0300783 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
784 if attr == "params" and func is urllib.parse.urlsplit:
785 attr = "path"
786 with self.subTest(url=url, function=func):
787 result = func(url, allow_fragments=False)
788 self.assertEqual(result.fragment, "")
postmasters90e01e52017-06-20 06:02:44 -0700789 self.assertTrue(
790 getattr(result, attr).endswith("#" + expected_frag))
Berker Peksag89584c92015-06-25 23:38:48 +0300791 self.assertEqual(func(url, "", False).fragment, "")
792
793 result = func(url, allow_fragments=True)
postmasters90e01e52017-06-20 06:02:44 -0700794 self.assertEqual(result.fragment, expected_frag)
795 self.assertFalse(
796 getattr(result, attr).endswith(expected_frag))
797 self.assertEqual(func(url, "", True).fragment,
798 expected_frag)
799 self.assertEqual(func(url).fragment, expected_frag)
Berker Peksag89584c92015-06-25 23:38:48 +0300800
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000801 def test_mixed_types_rejected(self):
802 # Several functions that process either strings or ASCII encoded bytes
803 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000804 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000805 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000806 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000807 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000808 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000809 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000810 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000811 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000812 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000813 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000814 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000815 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000816 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000817 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000818 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000819 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000820 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000821 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000822 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000823 urllib.parse.urljoin(b"http://python.org", "http://python.org")
824
825 def _check_result_type(self, str_type):
826 num_args = len(str_type._fields)
827 bytes_type = str_type._encoded_counterpart
828 self.assertIs(bytes_type._decoded_counterpart, str_type)
829 str_args = ('',) * num_args
830 bytes_args = (b'',) * num_args
831 str_result = str_type(*str_args)
832 bytes_result = bytes_type(*bytes_args)
833 encoding = 'ascii'
834 errors = 'strict'
835 self.assertEqual(str_result, str_args)
836 self.assertEqual(bytes_result.decode(), str_args)
837 self.assertEqual(bytes_result.decode(), str_result)
838 self.assertEqual(bytes_result.decode(encoding), str_args)
839 self.assertEqual(bytes_result.decode(encoding), str_result)
840 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
841 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
842 self.assertEqual(bytes_result, bytes_args)
843 self.assertEqual(str_result.encode(), bytes_args)
844 self.assertEqual(str_result.encode(), bytes_result)
845 self.assertEqual(str_result.encode(encoding), bytes_args)
846 self.assertEqual(str_result.encode(encoding), bytes_result)
847 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
848 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
849
850 def test_result_pairs(self):
851 # Check encoding and decoding between result pairs
852 result_types = [
853 urllib.parse.DefragResult,
854 urllib.parse.SplitResult,
855 urllib.parse.ParseResult,
856 ]
857 for result_type in result_types:
858 self._check_result_type(result_type)
859
Victor Stinner1d87deb2011-01-14 13:05:19 +0000860 def test_parse_qs_encoding(self):
861 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
862 self.assertEqual(result, {'key': ['\u0141\xE9']})
863 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
864 self.assertEqual(result, {'key': ['\u0141\xE9']})
865 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
866 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
867 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
868 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
869 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
870 errors="ignore")
871 self.assertEqual(result, {'key': ['\u0141-']})
872
873 def test_parse_qsl_encoding(self):
874 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
875 self.assertEqual(result, [('key', '\u0141\xE9')])
876 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
877 self.assertEqual(result, [('key', '\u0141\xE9')])
878 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
879 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
880 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
881 self.assertEqual(result, [('key', '\u0141\ufffd-')])
882 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
883 errors="ignore")
884 self.assertEqual(result, [('key', '\u0141-')])
885
matthewbelisle-wf20914482018-10-19 05:52:59 -0500886 def test_parse_qsl_max_num_fields(self):
887 with self.assertRaises(ValueError):
888 urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
889 with self.assertRaises(ValueError):
890 urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
891 urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
892
Senthil Kumarande02a712011-07-23 18:27:45 +0800893 def test_urlencode_sequences(self):
894 # Other tests incidentally urlencode things; test non-covered cases:
895 # Sequence and object values.
896 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100897 # we cannot rely on ordering here
898 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800899
900 class Trivial:
901 def __str__(self):
902 return 'trivial'
903
904 result = urllib.parse.urlencode({'a': Trivial()}, True)
905 self.assertEqual(result, 'a=trivial')
906
R David Murrayc17686f2015-05-17 20:44:50 -0400907 def test_urlencode_quote_via(self):
908 result = urllib.parse.urlencode({'a': 'some value'})
909 self.assertEqual(result, "a=some+value")
910 result = urllib.parse.urlencode({'a': 'some value/another'},
911 quote_via=urllib.parse.quote)
912 self.assertEqual(result, "a=some%20value%2Fanother")
913 result = urllib.parse.urlencode({'a': 'some value/another'},
914 safe='/', quote_via=urllib.parse.quote)
915 self.assertEqual(result, "a=some%20value/another")
916
Senthil Kumarande02a712011-07-23 18:27:45 +0800917 def test_quote_from_bytes(self):
918 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
919 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
920 self.assertEqual(result, 'archaeological%20arcana')
921 result = urllib.parse.quote_from_bytes(b'')
922 self.assertEqual(result, '')
923
924 def test_unquote_to_bytes(self):
925 result = urllib.parse.unquote_to_bytes('abc%20def')
926 self.assertEqual(result, b'abc def')
927 result = urllib.parse.unquote_to_bytes('')
928 self.assertEqual(result, b'')
929
930 def test_quote_errors(self):
931 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
932 encoding='utf-8')
933 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000934
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300935 def test_issue14072(self):
936 p1 = urllib.parse.urlsplit('tel:+31-641044153')
937 self.assertEqual(p1.scheme, 'tel')
938 self.assertEqual(p1.path, '+31-641044153')
939 p2 = urllib.parse.urlsplit('tel:+31641044153')
940 self.assertEqual(p2.scheme, 'tel')
941 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800942 # assert the behavior for urlparse
943 p1 = urllib.parse.urlparse('tel:+31-641044153')
944 self.assertEqual(p1.scheme, 'tel')
945 self.assertEqual(p1.path, '+31-641044153')
946 p2 = urllib.parse.urlparse('tel:+31641044153')
947 self.assertEqual(p2.scheme, 'tel')
948 self.assertEqual(p2.path, '+31641044153')
949
Matt Eaton2cb46612018-03-20 01:41:37 -0500950 def test_port_casting_failure_message(self):
951 message = "Port could not be cast to integer value as 'oracle'"
952 p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
953 with self.assertRaisesRegex(ValueError, message):
954 p1.port
955
956 p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
957 with self.assertRaisesRegex(ValueError, message):
958 p2.port
959
Senthil Kumaraned301992012-12-24 14:00:20 -0800960 def test_telurl_params(self):
961 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
962 self.assertEqual(p1.scheme, 'tel')
963 self.assertEqual(p1.path, '123-4')
964 self.assertEqual(p1.params, 'phone-context=+1-650-516')
965
966 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
967 self.assertEqual(p1.scheme, 'tel')
968 self.assertEqual(p1.path, '+1-201-555-0123')
969 self.assertEqual(p1.params, '')
970
971 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
972 self.assertEqual(p1.scheme, 'tel')
973 self.assertEqual(p1.path, '7042')
974 self.assertEqual(p1.params, 'phone-context=example.com')
975
976 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
977 self.assertEqual(p1.scheme, 'tel')
978 self.assertEqual(p1.path, '863-1234')
979 self.assertEqual(p1.params, 'phone-context=+1-914-555')
980
R David Murrayf5163882013-03-21 20:56:51 -0400981 def test_Quoter_repr(self):
982 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
983 self.assertIn('Quoter', repr(quoter))
984
Serhiy Storchaka15154502015-04-07 19:09:01 +0300985 def test_all(self):
986 expected = []
987 undocumented = {
988 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
989 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
990 'splitvalue',
991 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
992 }
993 for name in dir(urllib.parse):
994 if name.startswith('_') or name in undocumented:
995 continue
996 object = getattr(urllib.parse, name)
997 if getattr(object, '__module__', None) == 'urllib.parse':
998 expected.append(name)
999 self.assertCountEqual(urllib.parse.__all__, expected)
1000
Steve Dower16e6f7d2019-03-07 08:02:26 -08001001 def test_urlsplit_normalization(self):
1002 # Certain characters should never occur in the netloc,
1003 # including under normalization.
1004 # Ensure that ALL of them are detected and cause an error
1005 illegal_chars = '/:#?@'
1006 hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
1007 denorm_chars = [
1008 c for c in map(chr, range(128, sys.maxunicode))
1009 if (hex_chars & set(unicodedata.decomposition(c).split()))
1010 and c not in illegal_chars
1011 ]
1012 # Sanity check that we found at least one such character
1013 self.assertIn('\u2100', denorm_chars)
1014 self.assertIn('\uFF03', denorm_chars)
1015
Steve Dowerd537ab02019-04-30 12:03:02 +00001016 # bpo-36742: Verify port separators are ignored when they
1017 # existed prior to decomposition
1018 urllib.parse.urlsplit('http://\u30d5\u309a:80')
1019 with self.assertRaises(ValueError):
1020 urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
1021
Steve Dower16e6f7d2019-03-07 08:02:26 -08001022 for scheme in ["http", "https", "ftp"]:
Steve Dower8d0ef0b2019-06-04 08:55:30 -07001023 for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
1024 for c in denorm_chars:
1025 url = "{}://{}/path".format(scheme, netloc.format(c))
1026 with self.subTest(url=url, char='{:04X}'.format(ord(c))):
1027 with self.assertRaises(ValueError):
1028 urllib.parse.urlsplit(url)
Senthil Kumaran6be85c52010-02-19 07:42:50 +00001029
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001030class Utility_Tests(unittest.TestCase):
1031 """Testcase to test the various utility functions in the urllib."""
1032 # In Python 2 this test class was in test_urllib.
1033
1034 def test_splittype(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001035 splittype = urllib.parse._splittype
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001036 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
1037 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
1038 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
1039 self.assertEqual(splittype('type:'), ('type', ''))
1040 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
1041
1042 def test_splithost(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001043 splithost = urllib.parse._splithost
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001044 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
1045 ('www.example.org:80', '/foo/bar/baz.html'))
1046 self.assertEqual(splithost('//www.example.org:80'),
1047 ('www.example.org:80', ''))
1048 self.assertEqual(splithost('/foo/bar/baz.html'),
1049 (None, '/foo/bar/baz.html'))
1050
postmasters90e01e52017-06-20 06:02:44 -07001051 # bpo-30500: # starts a fragment.
1052 self.assertEqual(splithost('//127.0.0.1#@host.com'),
1053 ('127.0.0.1', '/#@host.com'))
1054 self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
1055 ('127.0.0.1', '/#@host.com:80'))
1056 self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
1057 ('127.0.0.1:80', '/#@host.com'))
1058
1059 # Empty host is returned as empty string.
1060 self.assertEqual(splithost("///file"),
1061 ('', '/file'))
1062
1063 # Trailing semicolon, question mark and hash symbol are kept.
1064 self.assertEqual(splithost("//example.net/file;"),
1065 ('example.net', '/file;'))
1066 self.assertEqual(splithost("//example.net/file?"),
1067 ('example.net', '/file?'))
1068 self.assertEqual(splithost("//example.net/file#"),
1069 ('example.net', '/file#'))
1070
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001071 def test_splituser(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001072 splituser = urllib.parse._splituser
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001073 self.assertEqual(splituser('User:Pass@www.python.org:080'),
1074 ('User:Pass', 'www.python.org:080'))
1075 self.assertEqual(splituser('@www.python.org:080'),
1076 ('', 'www.python.org:080'))
1077 self.assertEqual(splituser('www.python.org:080'),
1078 (None, 'www.python.org:080'))
1079 self.assertEqual(splituser('User:Pass@'),
1080 ('User:Pass', ''))
1081 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
1082 ('User@example.com:Pass', 'www.python.org:080'))
1083
1084 def test_splitpasswd(self):
1085 # Some of the password examples are not sensible, but it is added to
1086 # confirming to RFC2617 and addressing issue4675.
Cheryl Sabella867b8252018-06-03 10:31:32 -04001087 splitpasswd = urllib.parse._splitpasswd
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001088 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1089 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1090 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1091 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1092 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1093 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1094 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1095 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1096 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1097 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1098 self.assertEqual(splitpasswd('user:'), ('user', ''))
1099 self.assertEqual(splitpasswd('user'), ('user', None))
1100 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1101
1102 def test_splitport(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001103 splitport = urllib.parse._splitport
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001104 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1105 self.assertEqual(splitport('parrot'), ('parrot', None))
1106 self.assertEqual(splitport('parrot:'), ('parrot', None))
1107 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1108 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1109 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1110 self.assertEqual(splitport('[::1]'), ('[::1]', None))
1111 self.assertEqual(splitport(':88'), ('', '88'))
1112
1113 def test_splitnport(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001114 splitnport = urllib.parse._splitnport
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001115 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1116 self.assertEqual(splitnport('parrot'), ('parrot', -1))
1117 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1118 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1119 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1120 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1121 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1122 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1123 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1124
1125 def test_splitquery(self):
1126 # Normal cases are exercised by other tests; ensure that we also
1127 # catch cases with no port specified (testcase ensuring coverage)
Cheryl Sabella867b8252018-06-03 10:31:32 -04001128 splitquery = urllib.parse._splitquery
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001129 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1130 ('http://python.org/fake', 'foo=bar'))
1131 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1132 ('http://python.org/fake?foo=bar', ''))
1133 self.assertEqual(splitquery('http://python.org/fake'),
1134 ('http://python.org/fake', None))
1135 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1136
1137 def test_splittag(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001138 splittag = urllib.parse._splittag
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001139 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1140 ('http://example.com?foo=bar', 'baz'))
1141 self.assertEqual(splittag('http://example.com?foo=bar#'),
1142 ('http://example.com?foo=bar', ''))
1143 self.assertEqual(splittag('#baz'), ('', 'baz'))
1144 self.assertEqual(splittag('http://example.com?foo=bar'),
1145 ('http://example.com?foo=bar', None))
1146 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1147 ('http://example.com?foo=bar#baz', 'boo'))
1148
1149 def test_splitattr(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001150 splitattr = urllib.parse._splitattr
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001151 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1152 ('/path', ['attr1=value1', 'attr2=value2']))
1153 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1154 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1155 ('', ['attr1=value1', 'attr2=value2']))
1156 self.assertEqual(splitattr('/path'), ('/path', []))
1157
1158 def test_splitvalue(self):
1159 # Normal cases are exercised by other tests; test pathological cases
1160 # with no key/value pairs. (testcase ensuring coverage)
Cheryl Sabella867b8252018-06-03 10:31:32 -04001161 splitvalue = urllib.parse._splitvalue
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001162 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1163 self.assertEqual(splitvalue('foo='), ('foo', ''))
1164 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1165 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1166 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1167
1168 def test_to_bytes(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001169 result = urllib.parse._to_bytes('http://www.python.org')
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001170 self.assertEqual(result, 'http://www.python.org')
Cheryl Sabella0250de42018-04-25 16:51:54 -07001171 self.assertRaises(UnicodeError, urllib.parse._to_bytes,
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001172 'http://www.python.org/medi\u00e6val')
1173
1174 def test_unwrap(self):
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001175 for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',
1176 'URL:scheme://host/path', 'scheme://host/path'):
1177 url = urllib.parse.unwrap(wrapped_url)
1178 self.assertEqual(url, 'scheme://host/path')
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001179
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001180
Cheryl Sabella0250de42018-04-25 16:51:54 -07001181class DeprecationTest(unittest.TestCase):
1182
1183 def test_splittype_deprecation(self):
1184 with self.assertWarns(DeprecationWarning) as cm:
1185 urllib.parse.splittype('')
1186 self.assertEqual(str(cm.warning),
1187 'urllib.parse.splittype() is deprecated as of 3.8, '
1188 'use urllib.parse.urlparse() instead')
1189
1190 def test_splithost_deprecation(self):
1191 with self.assertWarns(DeprecationWarning) as cm:
1192 urllib.parse.splithost('')
1193 self.assertEqual(str(cm.warning),
1194 'urllib.parse.splithost() is deprecated as of 3.8, '
1195 'use urllib.parse.urlparse() instead')
1196
1197 def test_splituser_deprecation(self):
1198 with self.assertWarns(DeprecationWarning) as cm:
1199 urllib.parse.splituser('')
1200 self.assertEqual(str(cm.warning),
1201 'urllib.parse.splituser() is deprecated as of 3.8, '
1202 'use urllib.parse.urlparse() instead')
1203
1204 def test_splitpasswd_deprecation(self):
1205 with self.assertWarns(DeprecationWarning) as cm:
1206 urllib.parse.splitpasswd('')
1207 self.assertEqual(str(cm.warning),
1208 'urllib.parse.splitpasswd() is deprecated as of 3.8, '
1209 'use urllib.parse.urlparse() instead')
1210
1211 def test_splitport_deprecation(self):
1212 with self.assertWarns(DeprecationWarning) as cm:
1213 urllib.parse.splitport('')
1214 self.assertEqual(str(cm.warning),
1215 'urllib.parse.splitport() is deprecated as of 3.8, '
1216 'use urllib.parse.urlparse() instead')
1217
1218 def test_splitnport_deprecation(self):
1219 with self.assertWarns(DeprecationWarning) as cm:
1220 urllib.parse.splitnport('')
1221 self.assertEqual(str(cm.warning),
1222 'urllib.parse.splitnport() is deprecated as of 3.8, '
1223 'use urllib.parse.urlparse() instead')
1224
1225 def test_splitquery_deprecation(self):
1226 with self.assertWarns(DeprecationWarning) as cm:
1227 urllib.parse.splitquery('')
1228 self.assertEqual(str(cm.warning),
1229 'urllib.parse.splitquery() is deprecated as of 3.8, '
1230 'use urllib.parse.urlparse() instead')
1231
1232 def test_splittag_deprecation(self):
1233 with self.assertWarns(DeprecationWarning) as cm:
1234 urllib.parse.splittag('')
1235 self.assertEqual(str(cm.warning),
1236 'urllib.parse.splittag() is deprecated as of 3.8, '
1237 'use urllib.parse.urlparse() instead')
1238
1239 def test_splitattr_deprecation(self):
1240 with self.assertWarns(DeprecationWarning) as cm:
1241 urllib.parse.splitattr('')
1242 self.assertEqual(str(cm.warning),
1243 'urllib.parse.splitattr() is deprecated as of 3.8, '
1244 'use urllib.parse.urlparse() instead')
1245
1246 def test_splitvalue_deprecation(self):
1247 with self.assertWarns(DeprecationWarning) as cm:
1248 urllib.parse.splitvalue('')
1249 self.assertEqual(str(cm.warning),
1250 'urllib.parse.splitvalue() is deprecated as of 3.8, '
1251 'use urllib.parse.parse_qsl() instead')
1252
1253 def test_to_bytes_deprecation(self):
1254 with self.assertWarns(DeprecationWarning) as cm:
1255 urllib.parse.to_bytes('')
1256 self.assertEqual(str(cm.warning),
1257 'urllib.parse.to_bytes() is deprecated as of 3.8')
1258
Cheryl Sabella0250de42018-04-25 16:51:54 -07001259
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001260if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001261 unittest.main()