blob: 67341fecef94cd8e7bd4558c7df456c7c7f19a3e [file] [log] [blame]
Steve Dower16e6f7d2019-03-07 08:02:26 -08001import sys
2import unicodedata
Skip Montanaro6ec967d2002-03-23 05:32:10 +00003import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00004import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00005
Fred Drakea4d18a02001-01-05 05:57:04 +00006RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00007RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00008RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00009SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000010
Senthil Kumaran257b9802017-04-04 21:19:43 -070011# Each parse_qsl testcase is a two-tuple that contains
12# a string with the query and a list with the expected result.
Facundo Batistac469d4c2008-09-03 22:49:01 +000013
14parse_qsl_test_cases = [
15 ("", []),
16 ("&", []),
17 ("&&", []),
18 ("=", [('', '')]),
19 ("=a", [('', 'a')]),
20 ("a", [('a', '')]),
21 ("a=", [('a', '')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000022 ("&a=b", [('a', 'b')]),
23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000025 (b"", []),
26 (b"&", []),
27 (b"&&", []),
28 (b"=", [(b'', b'')]),
29 (b"=a", [(b'', b'a')]),
30 (b"a", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000032 (b"&a=b", [(b'a', b'b')]),
33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +020035 (";a=b", [(';a', 'b')]),
36 ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
37 (b";a=b", [(b';a', b'b')]),
38 (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
Senthil Kumarane38415e2016-04-16 07:33:15 -070039]
40
Senthil Kumaran257b9802017-04-04 21:19:43 -070041# Each parse_qs testcase is a two-tuple that contains
42# a string with the query and a dictionary with the expected result.
43
Senthil Kumarane38415e2016-04-16 07:33:15 -070044parse_qs_test_cases = [
45 ("", {}),
46 ("&", {}),
47 ("&&", {}),
48 ("=", {'': ['']}),
49 ("=a", {'': ['a']}),
50 ("a", {'a': ['']}),
51 ("a=", {'a': ['']}),
52 ("&a=b", {'a': ['b']}),
53 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
54 ("a=1&a=2", {'a': ['1', '2']}),
55 (b"", {}),
56 (b"&", {}),
57 (b"&&", {}),
58 (b"=", {b'': [b'']}),
59 (b"=a", {b'': [b'a']}),
60 (b"a", {b'a': [b'']}),
61 (b"a=", {b'a': [b'']}),
62 (b"&a=b", {b'a': [b'b']}),
63 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
64 (b"a=1&a=2", {b'a': [b'1', b'2']}),
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +020065 (";a=b", {';a': ['b']}),
66 ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
67 (b";a=b", {b';a': [b'b']}),
68 (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
Facundo Batistac469d4c2008-09-03 22:49:01 +000069]
70
Skip Montanaro6ec967d2002-03-23 05:32:10 +000071class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000072
73 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000074 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000075 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000076 t = (result.scheme, result.netloc, result.path,
77 result.params, result.query, result.fragment)
78 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000079 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000080 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000081 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000082 self.assertEqual(result2, result.geturl())
83
84 # the result of geturl() is a fixpoint; we can always parse it
85 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000087 self.assertEqual(result3.geturl(), result.geturl())
88 self.assertEqual(result3, result)
89 self.assertEqual(result3.scheme, result.scheme)
90 self.assertEqual(result3.netloc, result.netloc)
91 self.assertEqual(result3.path, result.path)
92 self.assertEqual(result3.params, result.params)
93 self.assertEqual(result3.query, result.query)
94 self.assertEqual(result3.fragment, result.fragment)
95 self.assertEqual(result3.username, result.username)
96 self.assertEqual(result3.password, result.password)
97 self.assertEqual(result3.hostname, result.hostname)
98 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000099
100 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000101 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000102 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000103 t = (result.scheme, result.netloc, result.path,
104 result.query, result.fragment)
105 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000106 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000107 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000108 self.assertEqual(result2, result.geturl())
109
110 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000111 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000112 self.assertEqual(result3.geturl(), result.geturl())
113 self.assertEqual(result3, result)
114 self.assertEqual(result3.scheme, result.scheme)
115 self.assertEqual(result3.netloc, result.netloc)
116 self.assertEqual(result3.path, result.path)
117 self.assertEqual(result3.query, result.query)
118 self.assertEqual(result3.fragment, result.fragment)
119 self.assertEqual(result3.username, result.username)
120 self.assertEqual(result3.password, result.password)
121 self.assertEqual(result3.hostname, result.hostname)
122 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000123
Facundo Batistac469d4c2008-09-03 22:49:01 +0000124 def test_qsl(self):
125 for orig, expect in parse_qsl_test_cases:
126 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +0800127 self.assertEqual(result, expect, "Error parsing %r" % orig)
128 expect_without_blanks = [v for v in expect if len(v[1])]
129 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
130 self.assertEqual(result, expect_without_blanks,
131 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000132
Senthil Kumarane38415e2016-04-16 07:33:15 -0700133 def test_qs(self):
134 for orig, expect in parse_qs_test_cases:
135 result = urllib.parse.parse_qs(orig, keep_blank_values=True)
136 self.assertEqual(result, expect, "Error parsing %r" % orig)
137 expect_without_blanks = {v: expect[v]
138 for v in expect if len(expect[v][0])}
139 result = urllib.parse.parse_qs(orig, keep_blank_values=False)
140 self.assertEqual(result, expect_without_blanks,
141 "Error parsing %r" % orig)
142
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000143 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000144 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000145 ('file:///tmp/junk.txt',
146 ('file', '', '/tmp/junk.txt', '', '', ''),
147 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000148 ('imap://mail.python.org/mbox1',
149 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
150 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000151 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000152 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
153 '', '', ''),
154 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
155 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000156 ('nfs://server/path/to/file.txt',
157 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
158 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000159 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
160 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
161 '', '', ''),
162 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000163 '', '')),
164 ('git+ssh://git@github.com/user/project.git',
165 ('git+ssh', 'git@github.com','/user/project.git',
166 '','',''),
167 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000168 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000169 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000170 def _encode(t):
171 return (t[0].encode('ascii'),
172 tuple(x.encode('ascii') for x in t[1]),
173 tuple(x.encode('ascii') for x in t[2]))
174 bytes_cases = [_encode(x) for x in str_cases]
175 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000176 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000177
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000178 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000179 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000180 # so we test both 'http:' and 'https:' in all the following.
181 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000182 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000183 ('://www.python.org',
184 ('www.python.org', '', '', '', ''),
185 ('www.python.org', '', '', '')),
186 ('://www.python.org#abc',
187 ('www.python.org', '', '', '', 'abc'),
188 ('www.python.org', '', '', 'abc')),
189 ('://www.python.org?q=abc',
190 ('www.python.org', '', '', 'q=abc', ''),
191 ('www.python.org', '', 'q=abc', '')),
192 ('://www.python.org/#abc',
193 ('www.python.org', '/', '', '', 'abc'),
194 ('www.python.org', '/', '', 'abc')),
195 ('://a/b/c/d;p?q#f',
196 ('a', '/b/c/d', 'p', 'q', 'f'),
197 ('a', '/b/c/d;p', 'q', 'f')),
198 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000199 def _encode(t):
200 return (t[0].encode('ascii'),
201 tuple(x.encode('ascii') for x in t[1]),
202 tuple(x.encode('ascii') for x in t[2]))
203 bytes_cases = [_encode(x) for x in str_cases]
204 str_schemes = ('http', 'https')
205 bytes_schemes = (b'http', b'https')
206 str_tests = str_schemes, str_cases
207 bytes_tests = bytes_schemes, bytes_cases
208 for schemes, test_cases in (str_tests, bytes_tests):
209 for scheme in schemes:
210 for url, parsed, split in test_cases:
211 url = scheme + url
212 parsed = (scheme,) + parsed
213 split = (scheme,) + split
214 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000215
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000216 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000217 str_components = (base, relurl, expected)
218 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
219 bytes_components = baseb, relurlb, expectedb = [
220 x.encode('ascii') for x in str_components]
221 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000222
223 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000224 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
225 bytes_cases = [x.encode('ascii') for x in str_cases]
226 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000227 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
228 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000229
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000230 def test_RFC1808(self):
231 # "normal" cases from RFC 1808:
232 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
233 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
234 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
235 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
236 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
237 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000238 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
239 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
240 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
241 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
242 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
243 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000244 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
245 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
246 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
247 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
248 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
249 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
250 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
251 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
252 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
253 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000254
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000255 # "abnormal" cases from RFC 1808:
256 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000257 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
258 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
259 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
260 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
261 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
262 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
263 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
264 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000265
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000266 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
267 # so we'll not actually run these tests (which expect 1808 behavior).
268 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
269 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000270
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400271 # XXX: The following tests are no longer compatible with RFC3986
272 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
273 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
274 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
275 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
276
277
Senthil Kumaran397eb442011-04-15 18:20:24 +0800278 def test_RFC2368(self):
279 # Issue 11467: path that starts with a number is not parsed correctly
280 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
281 ('mailto', '', '1337@example.org', '', '', ''))
282
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000283 def test_RFC2396(self):
284 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000285
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000286 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
287 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
288 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
289 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
290 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
291 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
292 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
293 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
294 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
295 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
296 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
297 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
298 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
299 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
300 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
301 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
302 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
303 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
304 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
305 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
306 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000307 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
308 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
309 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
310 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
311 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
312 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
313 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
314 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
315 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
316 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
317 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
318 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
319 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
320 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
321
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400322 # XXX: The following tests are no longer compatible with RFC3986
323 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
324 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
325 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
326 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
327
Facundo Batista23e38562008-08-14 16:55:14 +0000328 def test_RFC3986(self):
329 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400330 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000331 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
332 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
333 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
334 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
335 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
336 self.checkJoin(RFC3986_BASE, '//g','http://g')
337 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
338 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
339 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
340 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
341 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
342 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
343 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
344 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
345 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
346 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
347 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
348 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
349 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
350 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
351 self.checkJoin(RFC3986_BASE, '../..','http://a/')
352 self.checkJoin(RFC3986_BASE, '../../','http://a/')
353 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400354 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000355
Senthil Kumaran257b9802017-04-04 21:19:43 -0700356 # Abnormal Examples
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000357
358 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
359 # Tests are here for reference.
360
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400361 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
362 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
363 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
364 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000365 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
366 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
367 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
368 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
369 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
370 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
371 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
372 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
373 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
374 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
375 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
376 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
377 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
378 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
379 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
380 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000381
Senthil Kumarandca5b862010-12-17 04:48:45 +0000382 # Test for issue9721
383 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
384
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000385 def test_urljoins(self):
386 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
387 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
388 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
389 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
390 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
391 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
392 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
393 self.checkJoin(SIMPLE_BASE, '//g','http://g')
394 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
395 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
396 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
397 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
398 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
399 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
400 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
401 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
402 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
403 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000404 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
405 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000406 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
407 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
408 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
409 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
410 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
411 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
412 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800413 self.checkJoin('http:///', '..','http:///')
414 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
415 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800416 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800417 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Berker Peksagf6767482016-09-16 14:43:58 +0300418 self.checkJoin('ws://a/b','g','ws://a/g')
419 self.checkJoin('wss://a/b','g','wss://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000420
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400421 # XXX: The following tests are no longer compatible with RFC3986
422 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
423 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
424
Senthil Kumarana66e3882014-09-22 15:49:16 +0800425 # test for issue22118 duplicate slashes
426 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
427
428 # Non-RFC-defined tests, covering variations of base and trailing
429 # slashes
430 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
431 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
432 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
433 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
434 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
435 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
436
Berker Peksag20416f72015-04-16 02:31:14 +0300437 # issue 23703: don't duplicate filename
438 self.checkJoin('a', 'b', 'b')
439
Senthil Kumaranad02d232010-04-16 03:02:13 +0000440 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000441 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000442 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
443 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
444 ('http://[::1]:5432/foo/', '::1', 5432),
445 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
446 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
447 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
448 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
449 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
450 ('http://[::ffff:12.34.56.78]:5432/foo/',
451 '::ffff:12.34.56.78', 5432),
452 ('http://Test.python.org/foo/', 'test.python.org', None),
453 ('http://12.34.56.78/foo/', '12.34.56.78', None),
454 ('http://[::1]/foo/', '::1', None),
455 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
456 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
457 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
458 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
459 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
460 ('http://[::ffff:12.34.56.78]/foo/',
461 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200462 ('http://Test.python.org:/foo/', 'test.python.org', None),
463 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
464 ('http://[::1]:/foo/', '::1', None),
465 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
466 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
467 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
468 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
469 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
470 ('http://[::ffff:12.34.56.78]:/foo/',
471 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000472 ]
473 def _encode(t):
474 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
475 bytes_cases = [_encode(x) for x in str_cases]
476 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000477 urlparsed = urllib.parse.urlparse(url)
478 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
479
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000480 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000481 'http://::12.34.56.78]/',
482 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000483 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000484 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000485 'http://[::ffff:12.34.56.78']
486 bytes_cases = [x.encode('ascii') for x in str_cases]
487 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000488 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000489
Fred Drake70705652002-10-16 21:02:36 +0000490 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000491 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000492 ('http://python.org#frag', 'http://python.org', 'frag'),
493 ('http://python.org', 'http://python.org', ''),
494 ('http://python.org/#frag', 'http://python.org/', 'frag'),
495 ('http://python.org/', 'http://python.org/', ''),
496 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
497 ('http://python.org/?q', 'http://python.org/?q', ''),
498 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
499 ('http://python.org/p?q', 'http://python.org/p?q', ''),
500 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
501 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000502 ]
503 def _encode(t):
504 return type(t)(x.encode('ascii') for x in t)
505 bytes_cases = [_encode(x) for x in str_cases]
506 for url, defrag, frag in str_cases + bytes_cases:
507 result = urllib.parse.urldefrag(url)
508 self.assertEqual(result.geturl(), url)
509 self.assertEqual(result, (defrag, frag))
510 self.assertEqual(result.url, defrag)
511 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000512
Коренберг Маркfbd60512017-12-21 17:16:17 +0500513 def test_urlsplit_scoped_IPv6(self):
514 p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
515 self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
516 self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
517
518 p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
519 self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
520 self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
521
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000522 def test_urlsplit_attributes(self):
523 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000524 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000525 self.assertEqual(p.scheme, "http")
526 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
527 self.assertEqual(p.path, "/doc/")
528 self.assertEqual(p.query, "")
529 self.assertEqual(p.fragment, "frag")
530 self.assertEqual(p.username, None)
531 self.assertEqual(p.password, None)
532 self.assertEqual(p.hostname, "www.python.org")
533 self.assertEqual(p.port, None)
534 # geturl() won't return exactly the original URL in this case
535 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000536 # We handle this by ignoring the first 4 characters of the URL
537 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000538
539 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000540 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000541 self.assertEqual(p.scheme, "http")
542 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
543 self.assertEqual(p.path, "/doc/")
544 self.assertEqual(p.query, "query=yes")
545 self.assertEqual(p.fragment, "frag")
546 self.assertEqual(p.username, "User")
547 self.assertEqual(p.password, "Pass")
548 self.assertEqual(p.hostname, "www.python.org")
549 self.assertEqual(p.port, 80)
550 self.assertEqual(p.geturl(), url)
551
Christian Heimesfaf2f632008-01-06 16:59:19 +0000552 # Addressing issue1698, which suggests Username can contain
553 # "@" characters. Though not RFC compliant, many ftp sites allow
554 # and request email addresses as usernames.
555
556 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000557 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000558 self.assertEqual(p.scheme, "http")
559 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
560 self.assertEqual(p.path, "/doc/")
561 self.assertEqual(p.query, "query=yes")
562 self.assertEqual(p.fragment, "frag")
563 self.assertEqual(p.username, "User@example.com")
564 self.assertEqual(p.password, "Pass")
565 self.assertEqual(p.hostname, "www.python.org")
566 self.assertEqual(p.port, 80)
567 self.assertEqual(p.geturl(), url)
568
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000569 # And check them all again, only with bytes this time
570 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
571 p = urllib.parse.urlsplit(url)
572 self.assertEqual(p.scheme, b"http")
573 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
574 self.assertEqual(p.path, b"/doc/")
575 self.assertEqual(p.query, b"")
576 self.assertEqual(p.fragment, b"frag")
577 self.assertEqual(p.username, None)
578 self.assertEqual(p.password, None)
579 self.assertEqual(p.hostname, b"www.python.org")
580 self.assertEqual(p.port, None)
581 self.assertEqual(p.geturl()[4:], url[4:])
582
583 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
584 p = urllib.parse.urlsplit(url)
585 self.assertEqual(p.scheme, b"http")
586 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
587 self.assertEqual(p.path, b"/doc/")
588 self.assertEqual(p.query, b"query=yes")
589 self.assertEqual(p.fragment, b"frag")
590 self.assertEqual(p.username, b"User")
591 self.assertEqual(p.password, b"Pass")
592 self.assertEqual(p.hostname, b"www.python.org")
593 self.assertEqual(p.port, 80)
594 self.assertEqual(p.geturl(), url)
595
596 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
597 p = urllib.parse.urlsplit(url)
598 self.assertEqual(p.scheme, b"http")
599 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
600 self.assertEqual(p.path, b"/doc/")
601 self.assertEqual(p.query, b"query=yes")
602 self.assertEqual(p.fragment, b"frag")
603 self.assertEqual(p.username, b"User@example.com")
604 self.assertEqual(p.password, b"Pass")
605 self.assertEqual(p.hostname, b"www.python.org")
606 self.assertEqual(p.port, 80)
607 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000608
Robert Collinsdfa95c92015-08-10 09:53:30 +1200609 # Verify an illegal port raises ValueError
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800610 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
611 p = urllib.parse.urlsplit(url)
Robert Collinsdfa95c92015-08-10 09:53:30 +1200612 with self.assertRaisesRegex(ValueError, "out of range"):
613 p.port
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800614
Senthil Kumaran76cd81d2021-04-29 10:16:50 -0700615 def test_urlsplit_remove_unsafe_bytes(self):
616 # Remove ASCII tabs and newlines from input
617 url = "http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
618 p = urllib.parse.urlsplit(url)
619 self.assertEqual(p.scheme, "http")
620 self.assertEqual(p.netloc, "www.python.org")
621 self.assertEqual(p.path, "/javascript:alert('msg')/")
622 self.assertEqual(p.query, "")
623 self.assertEqual(p.fragment, "frag")
624 self.assertEqual(p.username, None)
625 self.assertEqual(p.password, None)
626 self.assertEqual(p.hostname, "www.python.org")
627 self.assertEqual(p.port, None)
628 self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/#frag")
629
630 # Remove ASCII tabs and newlines from input as bytes.
631 url = b"http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
632 p = urllib.parse.urlsplit(url)
633 self.assertEqual(p.scheme, b"http")
634 self.assertEqual(p.netloc, b"www.python.org")
635 self.assertEqual(p.path, b"/javascript:alert('msg')/")
636 self.assertEqual(p.query, b"")
637 self.assertEqual(p.fragment, b"frag")
638 self.assertEqual(p.username, None)
639 self.assertEqual(p.password, None)
640 self.assertEqual(p.hostname, b"www.python.org")
641 self.assertEqual(p.port, None)
642 self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/#frag")
643
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000644 def test_attributes_bad_port(self):
Robert Collinsdfa95c92015-08-10 09:53:30 +1200645 """Check handling of invalid ports."""
646 for bytes in (False, True):
647 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
648 for port in ("foo", "1.5", "-1", "0x10"):
649 with self.subTest(bytes=bytes, parse=parse, port=port):
650 netloc = "www.example.net:" + port
651 url = "http://" + netloc
652 if bytes:
653 netloc = netloc.encode("ascii")
654 url = url.encode("ascii")
655 p = parse(url)
656 self.assertEqual(p.netloc, netloc)
657 with self.assertRaises(ValueError):
658 p.port
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000659
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000660 def test_attributes_without_netloc(self):
661 # This example is straight from RFC 3261. It looks like it
662 # should allow the username, hostname, and port to be filled
663 # in, but doesn't. Since it's a URI and doesn't use the
664 # scheme://netloc syntax, the netloc and related attributes
665 # should be left empty.
666 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000667 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000668 self.assertEqual(p.netloc, "")
669 self.assertEqual(p.username, None)
670 self.assertEqual(p.password, None)
671 self.assertEqual(p.hostname, None)
672 self.assertEqual(p.port, None)
673 self.assertEqual(p.geturl(), uri)
674
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000675 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000676 self.assertEqual(p.netloc, "")
677 self.assertEqual(p.username, None)
678 self.assertEqual(p.password, None)
679 self.assertEqual(p.hostname, None)
680 self.assertEqual(p.port, None)
681 self.assertEqual(p.geturl(), uri)
682
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000683 # You guessed it, repeating the test with bytes input
684 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
685 p = urllib.parse.urlsplit(uri)
686 self.assertEqual(p.netloc, b"")
687 self.assertEqual(p.username, None)
688 self.assertEqual(p.password, None)
689 self.assertEqual(p.hostname, None)
690 self.assertEqual(p.port, None)
691 self.assertEqual(p.geturl(), uri)
692
693 p = urllib.parse.urlparse(uri)
694 self.assertEqual(p.netloc, b"")
695 self.assertEqual(p.username, None)
696 self.assertEqual(p.password, None)
697 self.assertEqual(p.hostname, None)
698 self.assertEqual(p.port, None)
699 self.assertEqual(p.geturl(), uri)
700
Christian Heimesfaf2f632008-01-06 16:59:19 +0000701 def test_noslash(self):
702 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000703 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000704 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000705 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
706 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000707
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000708 def test_withoutscheme(self):
709 # Test urlparse without scheme
710 # Issue 754016: urlparse goes wrong with IP:port without scheme
711 # RFC 1808 specifies that netloc should start with //, urlparse expects
712 # the same, otherwise it classifies the portion of url as path.
713 self.assertEqual(urllib.parse.urlparse("path"),
714 ('','','path','','',''))
715 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
716 ('','www.python.org:80','','','',''))
717 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
718 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000719 # Repeat for bytes input
720 self.assertEqual(urllib.parse.urlparse(b"path"),
721 (b'',b'',b'path',b'',b'',b''))
722 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
723 (b'',b'www.python.org:80',b'',b'',b'',b''))
724 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
725 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000726
727 def test_portseparator(self):
728 # Issue 754016 makes changes for port separator ':' from scheme separator
Tim Graham5a88d502019-10-18 09:07:20 -0400729 self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
730 self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
731 self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000732 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
733 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
734 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
735 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000736 # As usual, need to check bytes input as well
Tim Graham5a88d502019-10-18 09:07:20 -0400737 self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
738 self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
739 self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000740 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
741 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
742 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
743 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000744
Facundo Batista2ac5de22008-07-07 18:24:11 +0000745 def test_usingsys(self):
746 # Issue 3314: sys module is used in the error
747 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
748
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000749 def test_anyscheme(self):
750 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000751 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
752 ('s3', 'foo.com', '/stuff', '', '', ''))
753 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
754 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800755 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
756 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
757 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
758 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
759
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000760 # And for bytes...
761 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
762 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
763 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
764 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800765 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
766 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
767 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
768 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000769
Berker Peksag89584c92015-06-25 23:38:48 +0300770 def test_default_scheme(self):
771 # Exercise the scheme parameter of urlparse() and urlsplit()
772 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
773 with self.subTest(function=func):
774 result = func("http://example.net/", "ftp")
775 self.assertEqual(result.scheme, "http")
776 result = func(b"http://example.net/", b"ftp")
777 self.assertEqual(result.scheme, b"http")
778 self.assertEqual(func("path", "ftp").scheme, "ftp")
779 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
780 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
781 self.assertEqual(func("path").scheme, "")
782 self.assertEqual(func(b"path").scheme, b"")
783 self.assertEqual(func(b"path", "").scheme, b"")
784
785 def test_parse_fragments(self):
786 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
787 tests = (
postmasters90e01e52017-06-20 06:02:44 -0700788 ("http:#frag", "path", "frag"),
789 ("//example.net#frag", "path", "frag"),
790 ("index.html#frag", "path", "frag"),
791 (";a=b#frag", "params", "frag"),
792 ("?a=b#frag", "query", "frag"),
793 ("#frag", "path", "frag"),
794 ("abc#@frag", "path", "@frag"),
795 ("//abc#@frag", "path", "@frag"),
796 ("//abc:80#@frag", "path", "@frag"),
797 ("//abc#@frag:80", "path", "@frag:80"),
Berker Peksag89584c92015-06-25 23:38:48 +0300798 )
postmasters90e01e52017-06-20 06:02:44 -0700799 for url, attr, expected_frag in tests:
Berker Peksag89584c92015-06-25 23:38:48 +0300800 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
801 if attr == "params" and func is urllib.parse.urlsplit:
802 attr = "path"
803 with self.subTest(url=url, function=func):
804 result = func(url, allow_fragments=False)
805 self.assertEqual(result.fragment, "")
postmasters90e01e52017-06-20 06:02:44 -0700806 self.assertTrue(
807 getattr(result, attr).endswith("#" + expected_frag))
Berker Peksag89584c92015-06-25 23:38:48 +0300808 self.assertEqual(func(url, "", False).fragment, "")
809
810 result = func(url, allow_fragments=True)
postmasters90e01e52017-06-20 06:02:44 -0700811 self.assertEqual(result.fragment, expected_frag)
812 self.assertFalse(
813 getattr(result, attr).endswith(expected_frag))
814 self.assertEqual(func(url, "", True).fragment,
815 expected_frag)
816 self.assertEqual(func(url).fragment, expected_frag)
Berker Peksag89584c92015-06-25 23:38:48 +0300817
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000818 def test_mixed_types_rejected(self):
819 # Several functions that process either strings or ASCII encoded bytes
820 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000821 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000822 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000823 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000824 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000825 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000826 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000827 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000828 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000829 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000830 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000831 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000832 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000833 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000834 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000835 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000836 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000837 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000838 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000839 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000840 urllib.parse.urljoin(b"http://python.org", "http://python.org")
841
842 def _check_result_type(self, str_type):
843 num_args = len(str_type._fields)
844 bytes_type = str_type._encoded_counterpart
845 self.assertIs(bytes_type._decoded_counterpart, str_type)
846 str_args = ('',) * num_args
847 bytes_args = (b'',) * num_args
848 str_result = str_type(*str_args)
849 bytes_result = bytes_type(*bytes_args)
850 encoding = 'ascii'
851 errors = 'strict'
852 self.assertEqual(str_result, str_args)
853 self.assertEqual(bytes_result.decode(), str_args)
854 self.assertEqual(bytes_result.decode(), str_result)
855 self.assertEqual(bytes_result.decode(encoding), str_args)
856 self.assertEqual(bytes_result.decode(encoding), str_result)
857 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
858 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
859 self.assertEqual(bytes_result, bytes_args)
860 self.assertEqual(str_result.encode(), bytes_args)
861 self.assertEqual(str_result.encode(), bytes_result)
862 self.assertEqual(str_result.encode(encoding), bytes_args)
863 self.assertEqual(str_result.encode(encoding), bytes_result)
864 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
865 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
866
867 def test_result_pairs(self):
868 # Check encoding and decoding between result pairs
869 result_types = [
870 urllib.parse.DefragResult,
871 urllib.parse.SplitResult,
872 urllib.parse.ParseResult,
873 ]
874 for result_type in result_types:
875 self._check_result_type(result_type)
876
Victor Stinner1d87deb2011-01-14 13:05:19 +0000877 def test_parse_qs_encoding(self):
878 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
879 self.assertEqual(result, {'key': ['\u0141\xE9']})
880 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
881 self.assertEqual(result, {'key': ['\u0141\xE9']})
882 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
883 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
884 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
885 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
886 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
887 errors="ignore")
888 self.assertEqual(result, {'key': ['\u0141-']})
889
890 def test_parse_qsl_encoding(self):
891 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
892 self.assertEqual(result, [('key', '\u0141\xE9')])
893 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
894 self.assertEqual(result, [('key', '\u0141\xE9')])
895 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
896 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
897 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
898 self.assertEqual(result, [('key', '\u0141\ufffd-')])
899 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
900 errors="ignore")
901 self.assertEqual(result, [('key', '\u0141-')])
902
matthewbelisle-wf20914482018-10-19 05:52:59 -0500903 def test_parse_qsl_max_num_fields(self):
904 with self.assertRaises(ValueError):
905 urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
matthewbelisle-wf20914482018-10-19 05:52:59 -0500906 urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
907
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200908 def test_parse_qs_separator(self):
909 parse_qs_semicolon_cases = [
910 (";", {}),
911 (";;", {}),
912 (";a=b", {'a': ['b']}),
913 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
914 ("a=1;a=2", {'a': ['1', '2']}),
915 (b";", {}),
916 (b";;", {}),
917 (b";a=b", {b'a': [b'b']}),
918 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
919 (b"a=1;a=2", {b'a': [b'1', b'2']}),
920 ]
921 for orig, expect in parse_qs_semicolon_cases:
922 with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
923 result = urllib.parse.parse_qs(orig, separator=';')
924 self.assertEqual(result, expect, "Error parsing %r" % orig)
Ken Jinb38601d2021-04-11 21:26:09 +0800925 result_bytes = urllib.parse.parse_qs(orig, separator=b';')
926 self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200927
928
929 def test_parse_qsl_separator(self):
930 parse_qsl_semicolon_cases = [
931 (";", []),
932 (";;", []),
933 (";a=b", [('a', 'b')]),
934 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
935 ("a=1;a=2", [('a', '1'), ('a', '2')]),
936 (b";", []),
937 (b";;", []),
938 (b";a=b", [(b'a', b'b')]),
939 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
940 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
941 ]
942 for orig, expect in parse_qsl_semicolon_cases:
943 with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
944 result = urllib.parse.parse_qsl(orig, separator=';')
945 self.assertEqual(result, expect, "Error parsing %r" % orig)
Ken Jinb38601d2021-04-11 21:26:09 +0800946 result_bytes = urllib.parse.parse_qsl(orig, separator=b';')
947 self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200948
949
Senthil Kumarande02a712011-07-23 18:27:45 +0800950 def test_urlencode_sequences(self):
951 # Other tests incidentally urlencode things; test non-covered cases:
952 # Sequence and object values.
953 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100954 # we cannot rely on ordering here
955 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800956
957 class Trivial:
958 def __str__(self):
959 return 'trivial'
960
961 result = urllib.parse.urlencode({'a': Trivial()}, True)
962 self.assertEqual(result, 'a=trivial')
963
R David Murrayc17686f2015-05-17 20:44:50 -0400964 def test_urlencode_quote_via(self):
965 result = urllib.parse.urlencode({'a': 'some value'})
966 self.assertEqual(result, "a=some+value")
967 result = urllib.parse.urlencode({'a': 'some value/another'},
968 quote_via=urllib.parse.quote)
969 self.assertEqual(result, "a=some%20value%2Fanother")
970 result = urllib.parse.urlencode({'a': 'some value/another'},
971 safe='/', quote_via=urllib.parse.quote)
972 self.assertEqual(result, "a=some%20value/another")
973
Senthil Kumarande02a712011-07-23 18:27:45 +0800974 def test_quote_from_bytes(self):
975 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
976 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
977 self.assertEqual(result, 'archaeological%20arcana')
978 result = urllib.parse.quote_from_bytes(b'')
979 self.assertEqual(result, '')
980
981 def test_unquote_to_bytes(self):
982 result = urllib.parse.unquote_to_bytes('abc%20def')
983 self.assertEqual(result, b'abc def')
984 result = urllib.parse.unquote_to_bytes('')
985 self.assertEqual(result, b'')
986
987 def test_quote_errors(self):
988 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
989 encoding='utf-8')
990 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000991
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300992 def test_issue14072(self):
993 p1 = urllib.parse.urlsplit('tel:+31-641044153')
994 self.assertEqual(p1.scheme, 'tel')
995 self.assertEqual(p1.path, '+31-641044153')
996 p2 = urllib.parse.urlsplit('tel:+31641044153')
997 self.assertEqual(p2.scheme, 'tel')
998 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800999 # assert the behavior for urlparse
1000 p1 = urllib.parse.urlparse('tel:+31-641044153')
1001 self.assertEqual(p1.scheme, 'tel')
1002 self.assertEqual(p1.path, '+31-641044153')
1003 p2 = urllib.parse.urlparse('tel:+31641044153')
1004 self.assertEqual(p2.scheme, 'tel')
1005 self.assertEqual(p2.path, '+31641044153')
1006
Matt Eaton2cb46612018-03-20 01:41:37 -05001007 def test_port_casting_failure_message(self):
1008 message = "Port could not be cast to integer value as 'oracle'"
1009 p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
1010 with self.assertRaisesRegex(ValueError, message):
1011 p1.port
1012
1013 p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
1014 with self.assertRaisesRegex(ValueError, message):
1015 p2.port
1016
Senthil Kumaraned301992012-12-24 14:00:20 -08001017 def test_telurl_params(self):
1018 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
1019 self.assertEqual(p1.scheme, 'tel')
1020 self.assertEqual(p1.path, '123-4')
1021 self.assertEqual(p1.params, 'phone-context=+1-650-516')
1022
1023 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
1024 self.assertEqual(p1.scheme, 'tel')
1025 self.assertEqual(p1.path, '+1-201-555-0123')
1026 self.assertEqual(p1.params, '')
1027
1028 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
1029 self.assertEqual(p1.scheme, 'tel')
1030 self.assertEqual(p1.path, '7042')
1031 self.assertEqual(p1.params, 'phone-context=example.com')
1032
1033 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
1034 self.assertEqual(p1.scheme, 'tel')
1035 self.assertEqual(p1.path, '863-1234')
1036 self.assertEqual(p1.params, 'phone-context=+1-914-555')
1037
R David Murrayf5163882013-03-21 20:56:51 -04001038 def test_Quoter_repr(self):
1039 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
1040 self.assertIn('Quoter', repr(quoter))
1041
Serhiy Storchaka15154502015-04-07 19:09:01 +03001042 def test_all(self):
1043 expected = []
1044 undocumented = {
1045 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
1046 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
1047 'splitvalue',
1048 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
1049 }
1050 for name in dir(urllib.parse):
1051 if name.startswith('_') or name in undocumented:
1052 continue
1053 object = getattr(urllib.parse, name)
1054 if getattr(object, '__module__', None) == 'urllib.parse':
1055 expected.append(name)
1056 self.assertCountEqual(urllib.parse.__all__, expected)
1057
Steve Dower16e6f7d2019-03-07 08:02:26 -08001058 def test_urlsplit_normalization(self):
1059 # Certain characters should never occur in the netloc,
1060 # including under normalization.
1061 # Ensure that ALL of them are detected and cause an error
1062 illegal_chars = '/:#?@'
1063 hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
1064 denorm_chars = [
1065 c for c in map(chr, range(128, sys.maxunicode))
1066 if (hex_chars & set(unicodedata.decomposition(c).split()))
1067 and c not in illegal_chars
1068 ]
1069 # Sanity check that we found at least one such character
1070 self.assertIn('\u2100', denorm_chars)
1071 self.assertIn('\uFF03', denorm_chars)
1072
Steve Dowerd537ab02019-04-30 12:03:02 +00001073 # bpo-36742: Verify port separators are ignored when they
1074 # existed prior to decomposition
1075 urllib.parse.urlsplit('http://\u30d5\u309a:80')
1076 with self.assertRaises(ValueError):
1077 urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
1078
Steve Dower16e6f7d2019-03-07 08:02:26 -08001079 for scheme in ["http", "https", "ftp"]:
Steve Dower8d0ef0b2019-06-04 08:55:30 -07001080 for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
1081 for c in denorm_chars:
1082 url = "{}://{}/path".format(scheme, netloc.format(c))
1083 with self.subTest(url=url, char='{:04X}'.format(ord(c))):
1084 with self.assertRaises(ValueError):
1085 urllib.parse.urlsplit(url)
Senthil Kumaran6be85c52010-02-19 07:42:50 +00001086
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001087class Utility_Tests(unittest.TestCase):
1088 """Testcase to test the various utility functions in the urllib."""
1089 # In Python 2 this test class was in test_urllib.
1090
1091 def test_splittype(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001092 splittype = urllib.parse._splittype
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001093 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
1094 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
1095 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
1096 self.assertEqual(splittype('type:'), ('type', ''))
1097 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
1098
1099 def test_splithost(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001100 splithost = urllib.parse._splithost
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001101 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
1102 ('www.example.org:80', '/foo/bar/baz.html'))
1103 self.assertEqual(splithost('//www.example.org:80'),
1104 ('www.example.org:80', ''))
1105 self.assertEqual(splithost('/foo/bar/baz.html'),
1106 (None, '/foo/bar/baz.html'))
1107
postmasters90e01e52017-06-20 06:02:44 -07001108 # bpo-30500: # starts a fragment.
1109 self.assertEqual(splithost('//127.0.0.1#@host.com'),
1110 ('127.0.0.1', '/#@host.com'))
1111 self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
1112 ('127.0.0.1', '/#@host.com:80'))
1113 self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
1114 ('127.0.0.1:80', '/#@host.com'))
1115
1116 # Empty host is returned as empty string.
1117 self.assertEqual(splithost("///file"),
1118 ('', '/file'))
1119
1120 # Trailing semicolon, question mark and hash symbol are kept.
1121 self.assertEqual(splithost("//example.net/file;"),
1122 ('example.net', '/file;'))
1123 self.assertEqual(splithost("//example.net/file?"),
1124 ('example.net', '/file?'))
1125 self.assertEqual(splithost("//example.net/file#"),
1126 ('example.net', '/file#'))
1127
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001128 def test_splituser(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001129 splituser = urllib.parse._splituser
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001130 self.assertEqual(splituser('User:Pass@www.python.org:080'),
1131 ('User:Pass', 'www.python.org:080'))
1132 self.assertEqual(splituser('@www.python.org:080'),
1133 ('', 'www.python.org:080'))
1134 self.assertEqual(splituser('www.python.org:080'),
1135 (None, 'www.python.org:080'))
1136 self.assertEqual(splituser('User:Pass@'),
1137 ('User:Pass', ''))
1138 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
1139 ('User@example.com:Pass', 'www.python.org:080'))
1140
1141 def test_splitpasswd(self):
1142 # Some of the password examples are not sensible, but it is added to
1143 # confirming to RFC2617 and addressing issue4675.
Cheryl Sabella867b8252018-06-03 10:31:32 -04001144 splitpasswd = urllib.parse._splitpasswd
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001145 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1146 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1147 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1148 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1149 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1150 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1151 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1152 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1153 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1154 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1155 self.assertEqual(splitpasswd('user:'), ('user', ''))
1156 self.assertEqual(splitpasswd('user'), ('user', None))
1157 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1158
1159 def test_splitport(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001160 splitport = urllib.parse._splitport
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001161 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1162 self.assertEqual(splitport('parrot'), ('parrot', None))
1163 self.assertEqual(splitport('parrot:'), ('parrot', None))
1164 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1165 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1166 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1167 self.assertEqual(splitport('[::1]'), ('[::1]', None))
1168 self.assertEqual(splitport(':88'), ('', '88'))
1169
1170 def test_splitnport(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001171 splitnport = urllib.parse._splitnport
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001172 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1173 self.assertEqual(splitnport('parrot'), ('parrot', -1))
1174 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1175 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1176 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1177 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1178 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1179 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1180 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1181
1182 def test_splitquery(self):
1183 # Normal cases are exercised by other tests; ensure that we also
1184 # catch cases with no port specified (testcase ensuring coverage)
Cheryl Sabella867b8252018-06-03 10:31:32 -04001185 splitquery = urllib.parse._splitquery
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001186 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1187 ('http://python.org/fake', 'foo=bar'))
1188 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1189 ('http://python.org/fake?foo=bar', ''))
1190 self.assertEqual(splitquery('http://python.org/fake'),
1191 ('http://python.org/fake', None))
1192 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1193
1194 def test_splittag(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001195 splittag = urllib.parse._splittag
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001196 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1197 ('http://example.com?foo=bar', 'baz'))
1198 self.assertEqual(splittag('http://example.com?foo=bar#'),
1199 ('http://example.com?foo=bar', ''))
1200 self.assertEqual(splittag('#baz'), ('', 'baz'))
1201 self.assertEqual(splittag('http://example.com?foo=bar'),
1202 ('http://example.com?foo=bar', None))
1203 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1204 ('http://example.com?foo=bar#baz', 'boo'))
1205
1206 def test_splitattr(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001207 splitattr = urllib.parse._splitattr
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001208 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1209 ('/path', ['attr1=value1', 'attr2=value2']))
1210 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1211 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1212 ('', ['attr1=value1', 'attr2=value2']))
1213 self.assertEqual(splitattr('/path'), ('/path', []))
1214
1215 def test_splitvalue(self):
1216 # Normal cases are exercised by other tests; test pathological cases
1217 # with no key/value pairs. (testcase ensuring coverage)
Cheryl Sabella867b8252018-06-03 10:31:32 -04001218 splitvalue = urllib.parse._splitvalue
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001219 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1220 self.assertEqual(splitvalue('foo='), ('foo', ''))
1221 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1222 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1223 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1224
1225 def test_to_bytes(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001226 result = urllib.parse._to_bytes('http://www.python.org')
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001227 self.assertEqual(result, 'http://www.python.org')
Cheryl Sabella0250de42018-04-25 16:51:54 -07001228 self.assertRaises(UnicodeError, urllib.parse._to_bytes,
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001229 'http://www.python.org/medi\u00e6val')
1230
1231 def test_unwrap(self):
Rémi Lapeyre674ee122019-05-27 15:43:45 +02001232 for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',
1233 'URL:scheme://host/path', 'scheme://host/path'):
1234 url = urllib.parse.unwrap(wrapped_url)
1235 self.assertEqual(url, 'scheme://host/path')
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001236
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001237
Cheryl Sabella0250de42018-04-25 16:51:54 -07001238class DeprecationTest(unittest.TestCase):
1239
1240 def test_splittype_deprecation(self):
1241 with self.assertWarns(DeprecationWarning) as cm:
1242 urllib.parse.splittype('')
1243 self.assertEqual(str(cm.warning),
1244 'urllib.parse.splittype() is deprecated as of 3.8, '
1245 'use urllib.parse.urlparse() instead')
1246
1247 def test_splithost_deprecation(self):
1248 with self.assertWarns(DeprecationWarning) as cm:
1249 urllib.parse.splithost('')
1250 self.assertEqual(str(cm.warning),
1251 'urllib.parse.splithost() is deprecated as of 3.8, '
1252 'use urllib.parse.urlparse() instead')
1253
1254 def test_splituser_deprecation(self):
1255 with self.assertWarns(DeprecationWarning) as cm:
1256 urllib.parse.splituser('')
1257 self.assertEqual(str(cm.warning),
1258 'urllib.parse.splituser() is deprecated as of 3.8, '
1259 'use urllib.parse.urlparse() instead')
1260
1261 def test_splitpasswd_deprecation(self):
1262 with self.assertWarns(DeprecationWarning) as cm:
1263 urllib.parse.splitpasswd('')
1264 self.assertEqual(str(cm.warning),
1265 'urllib.parse.splitpasswd() is deprecated as of 3.8, '
1266 'use urllib.parse.urlparse() instead')
1267
1268 def test_splitport_deprecation(self):
1269 with self.assertWarns(DeprecationWarning) as cm:
1270 urllib.parse.splitport('')
1271 self.assertEqual(str(cm.warning),
1272 'urllib.parse.splitport() is deprecated as of 3.8, '
1273 'use urllib.parse.urlparse() instead')
1274
1275 def test_splitnport_deprecation(self):
1276 with self.assertWarns(DeprecationWarning) as cm:
1277 urllib.parse.splitnport('')
1278 self.assertEqual(str(cm.warning),
1279 'urllib.parse.splitnport() is deprecated as of 3.8, '
1280 'use urllib.parse.urlparse() instead')
1281
1282 def test_splitquery_deprecation(self):
1283 with self.assertWarns(DeprecationWarning) as cm:
1284 urllib.parse.splitquery('')
1285 self.assertEqual(str(cm.warning),
1286 'urllib.parse.splitquery() is deprecated as of 3.8, '
1287 'use urllib.parse.urlparse() instead')
1288
1289 def test_splittag_deprecation(self):
1290 with self.assertWarns(DeprecationWarning) as cm:
1291 urllib.parse.splittag('')
1292 self.assertEqual(str(cm.warning),
1293 'urllib.parse.splittag() is deprecated as of 3.8, '
1294 'use urllib.parse.urlparse() instead')
1295
1296 def test_splitattr_deprecation(self):
1297 with self.assertWarns(DeprecationWarning) as cm:
1298 urllib.parse.splitattr('')
1299 self.assertEqual(str(cm.warning),
1300 'urllib.parse.splitattr() is deprecated as of 3.8, '
1301 'use urllib.parse.urlparse() instead')
1302
1303 def test_splitvalue_deprecation(self):
1304 with self.assertWarns(DeprecationWarning) as cm:
1305 urllib.parse.splitvalue('')
1306 self.assertEqual(str(cm.warning),
1307 'urllib.parse.splitvalue() is deprecated as of 3.8, '
1308 'use urllib.parse.parse_qsl() instead')
1309
1310 def test_to_bytes_deprecation(self):
1311 with self.assertWarns(DeprecationWarning) as cm:
1312 urllib.parse.to_bytes('')
1313 self.assertEqual(str(cm.warning),
1314 'urllib.parse.to_bytes() is deprecated as of 3.8')
1315
Cheryl Sabella0250de42018-04-25 16:51:54 -07001316
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001317if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001318 unittest.main()