blob: cd3eabb5606b2278e0e3769b2b2887c2d5cb94a0 [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002import urllib.parse
Cheryl Sabella0250de42018-04-25 16:51:54 -07003import warnings
Fred Drakea4d18a02001-01-05 05:57:04 +00004
Fred Drakea4d18a02001-01-05 05:57:04 +00005RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00006RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00007RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00008SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00009
Senthil Kumaran257b9802017-04-04 21:19:43 -070010# Each parse_qsl testcase is a two-tuple that contains
11# a string with the query and a list with the expected result.
Facundo Batistac469d4c2008-09-03 22:49:01 +000012
13parse_qsl_test_cases = [
14 ("", []),
15 ("&", []),
16 ("&&", []),
17 ("=", [('', '')]),
18 ("=a", [('', 'a')]),
19 ("a", [('a', '')]),
20 ("a=", [('a', '')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000021 ("&a=b", [('a', 'b')]),
22 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
23 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000024 (b"", []),
25 (b"&", []),
26 (b"&&", []),
27 (b"=", [(b'', b'')]),
28 (b"=a", [(b'', b'a')]),
29 (b"a", [(b'a', b'')]),
30 (b"a=", [(b'a', b'')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000031 (b"&a=b", [(b'a', b'b')]),
32 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
33 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Senthil Kumarane38415e2016-04-16 07:33:15 -070034 (";", []),
35 (";;", []),
36 (";a=b", [('a', 'b')]),
37 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
38 ("a=1;a=2", [('a', '1'), ('a', '2')]),
39 (b";", []),
40 (b";;", []),
41 (b";a=b", [(b'a', b'b')]),
42 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
43 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
44]
45
Senthil Kumaran257b9802017-04-04 21:19:43 -070046# Each parse_qs testcase is a two-tuple that contains
47# a string with the query and a dictionary with the expected result.
48
Senthil Kumarane38415e2016-04-16 07:33:15 -070049parse_qs_test_cases = [
50 ("", {}),
51 ("&", {}),
52 ("&&", {}),
53 ("=", {'': ['']}),
54 ("=a", {'': ['a']}),
55 ("a", {'a': ['']}),
56 ("a=", {'a': ['']}),
57 ("&a=b", {'a': ['b']}),
58 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
59 ("a=1&a=2", {'a': ['1', '2']}),
60 (b"", {}),
61 (b"&", {}),
62 (b"&&", {}),
63 (b"=", {b'': [b'']}),
64 (b"=a", {b'': [b'a']}),
65 (b"a", {b'a': [b'']}),
66 (b"a=", {b'a': [b'']}),
67 (b"&a=b", {b'a': [b'b']}),
68 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
69 (b"a=1&a=2", {b'a': [b'1', b'2']}),
70 (";", {}),
71 (";;", {}),
72 (";a=b", {'a': ['b']}),
73 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
74 ("a=1;a=2", {'a': ['1', '2']}),
75 (b";", {}),
76 (b";;", {}),
77 (b";a=b", {b'a': [b'b']}),
78 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
79 (b"a=1;a=2", {b'a': [b'1', b'2']}),
Facundo Batistac469d4c2008-09-03 22:49:01 +000080]
81
Skip Montanaro6ec967d2002-03-23 05:32:10 +000082class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000083
84 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000085 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000086 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000087 t = (result.scheme, result.netloc, result.path,
88 result.params, result.query, result.fragment)
89 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000090 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000091 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000092 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000093 self.assertEqual(result2, result.geturl())
94
95 # the result of geturl() is a fixpoint; we can always parse it
96 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000097 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000098 self.assertEqual(result3.geturl(), result.geturl())
99 self.assertEqual(result3, result)
100 self.assertEqual(result3.scheme, result.scheme)
101 self.assertEqual(result3.netloc, result.netloc)
102 self.assertEqual(result3.path, result.path)
103 self.assertEqual(result3.params, result.params)
104 self.assertEqual(result3.query, result.query)
105 self.assertEqual(result3.fragment, result.fragment)
106 self.assertEqual(result3.username, result.username)
107 self.assertEqual(result3.password, result.password)
108 self.assertEqual(result3.hostname, result.hostname)
109 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000110
111 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000112 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000113 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000114 t = (result.scheme, result.netloc, result.path,
115 result.query, result.fragment)
116 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000117 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000118 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000119 self.assertEqual(result2, result.geturl())
120
121 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000122 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000123 self.assertEqual(result3.geturl(), result.geturl())
124 self.assertEqual(result3, result)
125 self.assertEqual(result3.scheme, result.scheme)
126 self.assertEqual(result3.netloc, result.netloc)
127 self.assertEqual(result3.path, result.path)
128 self.assertEqual(result3.query, result.query)
129 self.assertEqual(result3.fragment, result.fragment)
130 self.assertEqual(result3.username, result.username)
131 self.assertEqual(result3.password, result.password)
132 self.assertEqual(result3.hostname, result.hostname)
133 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000134
Facundo Batistac469d4c2008-09-03 22:49:01 +0000135 def test_qsl(self):
136 for orig, expect in parse_qsl_test_cases:
137 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +0800138 self.assertEqual(result, expect, "Error parsing %r" % orig)
139 expect_without_blanks = [v for v in expect if len(v[1])]
140 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
141 self.assertEqual(result, expect_without_blanks,
142 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000143
Senthil Kumarane38415e2016-04-16 07:33:15 -0700144 def test_qs(self):
145 for orig, expect in parse_qs_test_cases:
146 result = urllib.parse.parse_qs(orig, keep_blank_values=True)
147 self.assertEqual(result, expect, "Error parsing %r" % orig)
148 expect_without_blanks = {v: expect[v]
149 for v in expect if len(expect[v][0])}
150 result = urllib.parse.parse_qs(orig, keep_blank_values=False)
151 self.assertEqual(result, expect_without_blanks,
152 "Error parsing %r" % orig)
153
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000154 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000155 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000156 ('file:///tmp/junk.txt',
157 ('file', '', '/tmp/junk.txt', '', '', ''),
158 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000159 ('imap://mail.python.org/mbox1',
160 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
161 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000162 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000163 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
164 '', '', ''),
165 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
166 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000167 ('nfs://server/path/to/file.txt',
168 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
169 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000170 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
171 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
172 '', '', ''),
173 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000174 '', '')),
175 ('git+ssh://git@github.com/user/project.git',
176 ('git+ssh', 'git@github.com','/user/project.git',
177 '','',''),
178 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000179 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000180 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000181 def _encode(t):
182 return (t[0].encode('ascii'),
183 tuple(x.encode('ascii') for x in t[1]),
184 tuple(x.encode('ascii') for x in t[2]))
185 bytes_cases = [_encode(x) for x in str_cases]
186 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000187 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000188
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000189 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000190 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000191 # so we test both 'http:' and 'https:' in all the following.
192 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000193 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000194 ('://www.python.org',
195 ('www.python.org', '', '', '', ''),
196 ('www.python.org', '', '', '')),
197 ('://www.python.org#abc',
198 ('www.python.org', '', '', '', 'abc'),
199 ('www.python.org', '', '', 'abc')),
200 ('://www.python.org?q=abc',
201 ('www.python.org', '', '', 'q=abc', ''),
202 ('www.python.org', '', 'q=abc', '')),
203 ('://www.python.org/#abc',
204 ('www.python.org', '/', '', '', 'abc'),
205 ('www.python.org', '/', '', 'abc')),
206 ('://a/b/c/d;p?q#f',
207 ('a', '/b/c/d', 'p', 'q', 'f'),
208 ('a', '/b/c/d;p', 'q', 'f')),
209 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000210 def _encode(t):
211 return (t[0].encode('ascii'),
212 tuple(x.encode('ascii') for x in t[1]),
213 tuple(x.encode('ascii') for x in t[2]))
214 bytes_cases = [_encode(x) for x in str_cases]
215 str_schemes = ('http', 'https')
216 bytes_schemes = (b'http', b'https')
217 str_tests = str_schemes, str_cases
218 bytes_tests = bytes_schemes, bytes_cases
219 for schemes, test_cases in (str_tests, bytes_tests):
220 for scheme in schemes:
221 for url, parsed, split in test_cases:
222 url = scheme + url
223 parsed = (scheme,) + parsed
224 split = (scheme,) + split
225 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000226
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000227 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000228 str_components = (base, relurl, expected)
229 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
230 bytes_components = baseb, relurlb, expectedb = [
231 x.encode('ascii') for x in str_components]
232 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000233
234 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000235 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
236 bytes_cases = [x.encode('ascii') for x in str_cases]
237 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000238 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
239 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000240
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000241 def test_RFC1808(self):
242 # "normal" cases from RFC 1808:
243 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
244 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
245 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
246 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
247 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
248 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000249 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
250 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
251 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
252 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
253 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
254 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000255 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
256 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
257 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
258 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
259 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
260 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
261 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
262 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
263 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
264 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000265
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000266 # "abnormal" cases from RFC 1808:
267 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000268 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
269 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
270 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
271 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
272 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
273 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
274 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
275 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000276
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000277 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
278 # so we'll not actually run these tests (which expect 1808 behavior).
279 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
280 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000281
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400282 # XXX: The following tests are no longer compatible with RFC3986
283 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
284 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
285 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
286 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
287
288
Senthil Kumaran397eb442011-04-15 18:20:24 +0800289 def test_RFC2368(self):
290 # Issue 11467: path that starts with a number is not parsed correctly
291 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
292 ('mailto', '', '1337@example.org', '', '', ''))
293
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000294 def test_RFC2396(self):
295 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000296
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000297 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
298 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
299 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
300 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
301 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
302 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
303 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
304 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
305 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
306 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
307 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
308 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
309 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
310 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
311 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
312 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
313 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
314 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
315 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
316 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
317 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000318 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
319 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
320 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
321 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
322 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
323 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
324 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
325 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
326 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
327 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
328 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
329 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
330 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
331 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
332
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400333 # XXX: The following tests are no longer compatible with RFC3986
334 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
335 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
336 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
337 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
338
Facundo Batista23e38562008-08-14 16:55:14 +0000339 def test_RFC3986(self):
340 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400341 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000342 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
343 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
344 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
345 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
346 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
347 self.checkJoin(RFC3986_BASE, '//g','http://g')
348 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
349 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
350 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
351 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
352 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
353 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
354 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
355 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
356 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
357 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
358 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
359 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
360 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
361 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
362 self.checkJoin(RFC3986_BASE, '../..','http://a/')
363 self.checkJoin(RFC3986_BASE, '../../','http://a/')
364 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400365 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000366
Senthil Kumaran257b9802017-04-04 21:19:43 -0700367 # Abnormal Examples
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000368
369 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
370 # Tests are here for reference.
371
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400372 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
373 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
374 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
375 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000376 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
377 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
378 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
379 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
380 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
381 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
382 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
383 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
384 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
385 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
386 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
387 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
388 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
389 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
390 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
391 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000392
Senthil Kumarandca5b862010-12-17 04:48:45 +0000393 # Test for issue9721
394 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
395
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000396 def test_urljoins(self):
397 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
398 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
399 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
400 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
401 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
402 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
403 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
404 self.checkJoin(SIMPLE_BASE, '//g','http://g')
405 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
406 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
407 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
408 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
409 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
410 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
411 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
412 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
413 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
414 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000415 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
416 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000417 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
418 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
419 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
420 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
421 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
422 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
423 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800424 self.checkJoin('http:///', '..','http:///')
425 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
426 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800427 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800428 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Berker Peksagf6767482016-09-16 14:43:58 +0300429 self.checkJoin('ws://a/b','g','ws://a/g')
430 self.checkJoin('wss://a/b','g','wss://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000431
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400432 # XXX: The following tests are no longer compatible with RFC3986
433 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
434 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
435
Senthil Kumarana66e3882014-09-22 15:49:16 +0800436 # test for issue22118 duplicate slashes
437 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
438
439 # Non-RFC-defined tests, covering variations of base and trailing
440 # slashes
441 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
442 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
443 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
444 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
445 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
446 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
447
Berker Peksag20416f72015-04-16 02:31:14 +0300448 # issue 23703: don't duplicate filename
449 self.checkJoin('a', 'b', 'b')
450
Senthil Kumaranad02d232010-04-16 03:02:13 +0000451 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000452 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000453 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
454 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
455 ('http://[::1]:5432/foo/', '::1', 5432),
456 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
457 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
458 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
459 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
460 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
461 ('http://[::ffff:12.34.56.78]:5432/foo/',
462 '::ffff:12.34.56.78', 5432),
463 ('http://Test.python.org/foo/', 'test.python.org', None),
464 ('http://12.34.56.78/foo/', '12.34.56.78', None),
465 ('http://[::1]/foo/', '::1', None),
466 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
467 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
468 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
469 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
470 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
471 ('http://[::ffff:12.34.56.78]/foo/',
472 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200473 ('http://Test.python.org:/foo/', 'test.python.org', None),
474 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
475 ('http://[::1]:/foo/', '::1', None),
476 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
477 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
478 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
479 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
480 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
481 ('http://[::ffff:12.34.56.78]:/foo/',
482 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000483 ]
484 def _encode(t):
485 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
486 bytes_cases = [_encode(x) for x in str_cases]
487 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000488 urlparsed = urllib.parse.urlparse(url)
489 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
490
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000491 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000492 'http://::12.34.56.78]/',
493 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000494 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000495 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000496 'http://[::ffff:12.34.56.78']
497 bytes_cases = [x.encode('ascii') for x in str_cases]
498 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000499 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000500
Fred Drake70705652002-10-16 21:02:36 +0000501 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000502 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000503 ('http://python.org#frag', 'http://python.org', 'frag'),
504 ('http://python.org', 'http://python.org', ''),
505 ('http://python.org/#frag', 'http://python.org/', 'frag'),
506 ('http://python.org/', 'http://python.org/', ''),
507 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
508 ('http://python.org/?q', 'http://python.org/?q', ''),
509 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
510 ('http://python.org/p?q', 'http://python.org/p?q', ''),
511 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
512 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000513 ]
514 def _encode(t):
515 return type(t)(x.encode('ascii') for x in t)
516 bytes_cases = [_encode(x) for x in str_cases]
517 for url, defrag, frag in str_cases + bytes_cases:
518 result = urllib.parse.urldefrag(url)
519 self.assertEqual(result.geturl(), url)
520 self.assertEqual(result, (defrag, frag))
521 self.assertEqual(result.url, defrag)
522 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000523
Коренберг Маркfbd60512017-12-21 17:16:17 +0500524 def test_urlsplit_scoped_IPv6(self):
525 p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
526 self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
527 self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
528
529 p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
530 self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
531 self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
532
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000533 def test_urlsplit_attributes(self):
534 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000535 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000536 self.assertEqual(p.scheme, "http")
537 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
538 self.assertEqual(p.path, "/doc/")
539 self.assertEqual(p.query, "")
540 self.assertEqual(p.fragment, "frag")
541 self.assertEqual(p.username, None)
542 self.assertEqual(p.password, None)
543 self.assertEqual(p.hostname, "www.python.org")
544 self.assertEqual(p.port, None)
545 # geturl() won't return exactly the original URL in this case
546 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000547 # We handle this by ignoring the first 4 characters of the URL
548 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000549
550 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000551 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000552 self.assertEqual(p.scheme, "http")
553 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
554 self.assertEqual(p.path, "/doc/")
555 self.assertEqual(p.query, "query=yes")
556 self.assertEqual(p.fragment, "frag")
557 self.assertEqual(p.username, "User")
558 self.assertEqual(p.password, "Pass")
559 self.assertEqual(p.hostname, "www.python.org")
560 self.assertEqual(p.port, 80)
561 self.assertEqual(p.geturl(), url)
562
Christian Heimesfaf2f632008-01-06 16:59:19 +0000563 # Addressing issue1698, which suggests Username can contain
564 # "@" characters. Though not RFC compliant, many ftp sites allow
565 # and request email addresses as usernames.
566
567 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000568 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000569 self.assertEqual(p.scheme, "http")
570 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
571 self.assertEqual(p.path, "/doc/")
572 self.assertEqual(p.query, "query=yes")
573 self.assertEqual(p.fragment, "frag")
574 self.assertEqual(p.username, "User@example.com")
575 self.assertEqual(p.password, "Pass")
576 self.assertEqual(p.hostname, "www.python.org")
577 self.assertEqual(p.port, 80)
578 self.assertEqual(p.geturl(), url)
579
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000580 # And check them all again, only with bytes this time
581 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
582 p = urllib.parse.urlsplit(url)
583 self.assertEqual(p.scheme, b"http")
584 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
585 self.assertEqual(p.path, b"/doc/")
586 self.assertEqual(p.query, b"")
587 self.assertEqual(p.fragment, b"frag")
588 self.assertEqual(p.username, None)
589 self.assertEqual(p.password, None)
590 self.assertEqual(p.hostname, b"www.python.org")
591 self.assertEqual(p.port, None)
592 self.assertEqual(p.geturl()[4:], url[4:])
593
594 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
595 p = urllib.parse.urlsplit(url)
596 self.assertEqual(p.scheme, b"http")
597 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
598 self.assertEqual(p.path, b"/doc/")
599 self.assertEqual(p.query, b"query=yes")
600 self.assertEqual(p.fragment, b"frag")
601 self.assertEqual(p.username, b"User")
602 self.assertEqual(p.password, b"Pass")
603 self.assertEqual(p.hostname, b"www.python.org")
604 self.assertEqual(p.port, 80)
605 self.assertEqual(p.geturl(), url)
606
607 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
608 p = urllib.parse.urlsplit(url)
609 self.assertEqual(p.scheme, b"http")
610 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
611 self.assertEqual(p.path, b"/doc/")
612 self.assertEqual(p.query, b"query=yes")
613 self.assertEqual(p.fragment, b"frag")
614 self.assertEqual(p.username, b"User@example.com")
615 self.assertEqual(p.password, b"Pass")
616 self.assertEqual(p.hostname, b"www.python.org")
617 self.assertEqual(p.port, 80)
618 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000619
Robert Collinsdfa95c92015-08-10 09:53:30 +1200620 # Verify an illegal port raises ValueError
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800621 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
622 p = urllib.parse.urlsplit(url)
Robert Collinsdfa95c92015-08-10 09:53:30 +1200623 with self.assertRaisesRegex(ValueError, "out of range"):
624 p.port
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800625
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000626 def test_attributes_bad_port(self):
Robert Collinsdfa95c92015-08-10 09:53:30 +1200627 """Check handling of invalid ports."""
628 for bytes in (False, True):
629 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
630 for port in ("foo", "1.5", "-1", "0x10"):
631 with self.subTest(bytes=bytes, parse=parse, port=port):
632 netloc = "www.example.net:" + port
633 url = "http://" + netloc
634 if bytes:
635 netloc = netloc.encode("ascii")
636 url = url.encode("ascii")
637 p = parse(url)
638 self.assertEqual(p.netloc, netloc)
639 with self.assertRaises(ValueError):
640 p.port
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000641
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000642 def test_attributes_without_netloc(self):
643 # This example is straight from RFC 3261. It looks like it
644 # should allow the username, hostname, and port to be filled
645 # in, but doesn't. Since it's a URI and doesn't use the
646 # scheme://netloc syntax, the netloc and related attributes
647 # should be left empty.
648 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000649 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000650 self.assertEqual(p.netloc, "")
651 self.assertEqual(p.username, None)
652 self.assertEqual(p.password, None)
653 self.assertEqual(p.hostname, None)
654 self.assertEqual(p.port, None)
655 self.assertEqual(p.geturl(), uri)
656
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000657 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000658 self.assertEqual(p.netloc, "")
659 self.assertEqual(p.username, None)
660 self.assertEqual(p.password, None)
661 self.assertEqual(p.hostname, None)
662 self.assertEqual(p.port, None)
663 self.assertEqual(p.geturl(), uri)
664
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000665 # You guessed it, repeating the test with bytes input
666 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
667 p = urllib.parse.urlsplit(uri)
668 self.assertEqual(p.netloc, b"")
669 self.assertEqual(p.username, None)
670 self.assertEqual(p.password, None)
671 self.assertEqual(p.hostname, None)
672 self.assertEqual(p.port, None)
673 self.assertEqual(p.geturl(), uri)
674
675 p = urllib.parse.urlparse(uri)
676 self.assertEqual(p.netloc, b"")
677 self.assertEqual(p.username, None)
678 self.assertEqual(p.password, None)
679 self.assertEqual(p.hostname, None)
680 self.assertEqual(p.port, None)
681 self.assertEqual(p.geturl(), uri)
682
Christian Heimesfaf2f632008-01-06 16:59:19 +0000683 def test_noslash(self):
684 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000685 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000686 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000687 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
688 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000689
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000690 def test_withoutscheme(self):
691 # Test urlparse without scheme
692 # Issue 754016: urlparse goes wrong with IP:port without scheme
693 # RFC 1808 specifies that netloc should start with //, urlparse expects
694 # the same, otherwise it classifies the portion of url as path.
695 self.assertEqual(urllib.parse.urlparse("path"),
696 ('','','path','','',''))
697 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
698 ('','www.python.org:80','','','',''))
699 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
700 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000701 # Repeat for bytes input
702 self.assertEqual(urllib.parse.urlparse(b"path"),
703 (b'',b'',b'path',b'',b'',b''))
704 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
705 (b'',b'www.python.org:80',b'',b'',b'',b''))
706 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
707 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000708
709 def test_portseparator(self):
710 # Issue 754016 makes changes for port separator ':' from scheme separator
711 self.assertEqual(urllib.parse.urlparse("path:80"),
712 ('','','path:80','','',''))
713 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
714 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
715 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
716 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000717 # As usual, need to check bytes input as well
718 self.assertEqual(urllib.parse.urlparse(b"path:80"),
719 (b'',b'',b'path:80',b'',b'',b''))
720 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
721 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
722 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
723 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000724
Facundo Batista2ac5de22008-07-07 18:24:11 +0000725 def test_usingsys(self):
726 # Issue 3314: sys module is used in the error
727 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
728
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000729 def test_anyscheme(self):
730 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000731 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
732 ('s3', 'foo.com', '/stuff', '', '', ''))
733 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
734 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800735 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
736 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
737 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
738 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
739
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000740 # And for bytes...
741 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
742 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
743 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
744 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800745 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
746 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
747 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
748 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000749
Berker Peksag89584c92015-06-25 23:38:48 +0300750 def test_default_scheme(self):
751 # Exercise the scheme parameter of urlparse() and urlsplit()
752 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
753 with self.subTest(function=func):
754 result = func("http://example.net/", "ftp")
755 self.assertEqual(result.scheme, "http")
756 result = func(b"http://example.net/", b"ftp")
757 self.assertEqual(result.scheme, b"http")
758 self.assertEqual(func("path", "ftp").scheme, "ftp")
759 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
760 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
761 self.assertEqual(func("path").scheme, "")
762 self.assertEqual(func(b"path").scheme, b"")
763 self.assertEqual(func(b"path", "").scheme, b"")
764
765 def test_parse_fragments(self):
766 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
767 tests = (
postmasters90e01e52017-06-20 06:02:44 -0700768 ("http:#frag", "path", "frag"),
769 ("//example.net#frag", "path", "frag"),
770 ("index.html#frag", "path", "frag"),
771 (";a=b#frag", "params", "frag"),
772 ("?a=b#frag", "query", "frag"),
773 ("#frag", "path", "frag"),
774 ("abc#@frag", "path", "@frag"),
775 ("//abc#@frag", "path", "@frag"),
776 ("//abc:80#@frag", "path", "@frag"),
777 ("//abc#@frag:80", "path", "@frag:80"),
Berker Peksag89584c92015-06-25 23:38:48 +0300778 )
postmasters90e01e52017-06-20 06:02:44 -0700779 for url, attr, expected_frag in tests:
Berker Peksag89584c92015-06-25 23:38:48 +0300780 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
781 if attr == "params" and func is urllib.parse.urlsplit:
782 attr = "path"
783 with self.subTest(url=url, function=func):
784 result = func(url, allow_fragments=False)
785 self.assertEqual(result.fragment, "")
postmasters90e01e52017-06-20 06:02:44 -0700786 self.assertTrue(
787 getattr(result, attr).endswith("#" + expected_frag))
Berker Peksag89584c92015-06-25 23:38:48 +0300788 self.assertEqual(func(url, "", False).fragment, "")
789
790 result = func(url, allow_fragments=True)
postmasters90e01e52017-06-20 06:02:44 -0700791 self.assertEqual(result.fragment, expected_frag)
792 self.assertFalse(
793 getattr(result, attr).endswith(expected_frag))
794 self.assertEqual(func(url, "", True).fragment,
795 expected_frag)
796 self.assertEqual(func(url).fragment, expected_frag)
Berker Peksag89584c92015-06-25 23:38:48 +0300797
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000798 def test_mixed_types_rejected(self):
799 # Several functions that process either strings or ASCII encoded bytes
800 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000801 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000802 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000803 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000804 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000805 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000806 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000807 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000808 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000809 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000810 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000811 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000812 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000813 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000814 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000815 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000816 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000817 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000818 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000819 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000820 urllib.parse.urljoin(b"http://python.org", "http://python.org")
821
822 def _check_result_type(self, str_type):
823 num_args = len(str_type._fields)
824 bytes_type = str_type._encoded_counterpart
825 self.assertIs(bytes_type._decoded_counterpart, str_type)
826 str_args = ('',) * num_args
827 bytes_args = (b'',) * num_args
828 str_result = str_type(*str_args)
829 bytes_result = bytes_type(*bytes_args)
830 encoding = 'ascii'
831 errors = 'strict'
832 self.assertEqual(str_result, str_args)
833 self.assertEqual(bytes_result.decode(), str_args)
834 self.assertEqual(bytes_result.decode(), str_result)
835 self.assertEqual(bytes_result.decode(encoding), str_args)
836 self.assertEqual(bytes_result.decode(encoding), str_result)
837 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
838 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
839 self.assertEqual(bytes_result, bytes_args)
840 self.assertEqual(str_result.encode(), bytes_args)
841 self.assertEqual(str_result.encode(), bytes_result)
842 self.assertEqual(str_result.encode(encoding), bytes_args)
843 self.assertEqual(str_result.encode(encoding), bytes_result)
844 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
845 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
846
847 def test_result_pairs(self):
848 # Check encoding and decoding between result pairs
849 result_types = [
850 urllib.parse.DefragResult,
851 urllib.parse.SplitResult,
852 urllib.parse.ParseResult,
853 ]
854 for result_type in result_types:
855 self._check_result_type(result_type)
856
Victor Stinner1d87deb2011-01-14 13:05:19 +0000857 def test_parse_qs_encoding(self):
858 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
859 self.assertEqual(result, {'key': ['\u0141\xE9']})
860 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
861 self.assertEqual(result, {'key': ['\u0141\xE9']})
862 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
863 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
864 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
865 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
866 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
867 errors="ignore")
868 self.assertEqual(result, {'key': ['\u0141-']})
869
870 def test_parse_qsl_encoding(self):
871 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
872 self.assertEqual(result, [('key', '\u0141\xE9')])
873 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
874 self.assertEqual(result, [('key', '\u0141\xE9')])
875 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
876 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
877 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
878 self.assertEqual(result, [('key', '\u0141\ufffd-')])
879 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
880 errors="ignore")
881 self.assertEqual(result, [('key', '\u0141-')])
882
Senthil Kumarande02a712011-07-23 18:27:45 +0800883 def test_urlencode_sequences(self):
884 # Other tests incidentally urlencode things; test non-covered cases:
885 # Sequence and object values.
886 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100887 # we cannot rely on ordering here
888 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800889
890 class Trivial:
891 def __str__(self):
892 return 'trivial'
893
894 result = urllib.parse.urlencode({'a': Trivial()}, True)
895 self.assertEqual(result, 'a=trivial')
896
R David Murrayc17686f2015-05-17 20:44:50 -0400897 def test_urlencode_quote_via(self):
898 result = urllib.parse.urlencode({'a': 'some value'})
899 self.assertEqual(result, "a=some+value")
900 result = urllib.parse.urlencode({'a': 'some value/another'},
901 quote_via=urllib.parse.quote)
902 self.assertEqual(result, "a=some%20value%2Fanother")
903 result = urllib.parse.urlencode({'a': 'some value/another'},
904 safe='/', quote_via=urllib.parse.quote)
905 self.assertEqual(result, "a=some%20value/another")
906
Senthil Kumarande02a712011-07-23 18:27:45 +0800907 def test_quote_from_bytes(self):
908 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
909 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
910 self.assertEqual(result, 'archaeological%20arcana')
911 result = urllib.parse.quote_from_bytes(b'')
912 self.assertEqual(result, '')
913
914 def test_unquote_to_bytes(self):
915 result = urllib.parse.unquote_to_bytes('abc%20def')
916 self.assertEqual(result, b'abc def')
917 result = urllib.parse.unquote_to_bytes('')
918 self.assertEqual(result, b'')
919
920 def test_quote_errors(self):
921 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
922 encoding='utf-8')
923 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000924
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300925 def test_issue14072(self):
926 p1 = urllib.parse.urlsplit('tel:+31-641044153')
927 self.assertEqual(p1.scheme, 'tel')
928 self.assertEqual(p1.path, '+31-641044153')
929 p2 = urllib.parse.urlsplit('tel:+31641044153')
930 self.assertEqual(p2.scheme, 'tel')
931 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800932 # assert the behavior for urlparse
933 p1 = urllib.parse.urlparse('tel:+31-641044153')
934 self.assertEqual(p1.scheme, 'tel')
935 self.assertEqual(p1.path, '+31-641044153')
936 p2 = urllib.parse.urlparse('tel:+31641044153')
937 self.assertEqual(p2.scheme, 'tel')
938 self.assertEqual(p2.path, '+31641044153')
939
Matt Eaton2cb46612018-03-20 01:41:37 -0500940 def test_port_casting_failure_message(self):
941 message = "Port could not be cast to integer value as 'oracle'"
942 p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
943 with self.assertRaisesRegex(ValueError, message):
944 p1.port
945
946 p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
947 with self.assertRaisesRegex(ValueError, message):
948 p2.port
949
Senthil Kumaraned301992012-12-24 14:00:20 -0800950 def test_telurl_params(self):
951 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
952 self.assertEqual(p1.scheme, 'tel')
953 self.assertEqual(p1.path, '123-4')
954 self.assertEqual(p1.params, 'phone-context=+1-650-516')
955
956 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
957 self.assertEqual(p1.scheme, 'tel')
958 self.assertEqual(p1.path, '+1-201-555-0123')
959 self.assertEqual(p1.params, '')
960
961 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
962 self.assertEqual(p1.scheme, 'tel')
963 self.assertEqual(p1.path, '7042')
964 self.assertEqual(p1.params, 'phone-context=example.com')
965
966 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
967 self.assertEqual(p1.scheme, 'tel')
968 self.assertEqual(p1.path, '863-1234')
969 self.assertEqual(p1.params, 'phone-context=+1-914-555')
970
R David Murrayf5163882013-03-21 20:56:51 -0400971 def test_Quoter_repr(self):
972 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
973 self.assertIn('Quoter', repr(quoter))
974
Serhiy Storchaka15154502015-04-07 19:09:01 +0300975 def test_all(self):
976 expected = []
977 undocumented = {
978 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
979 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
980 'splitvalue',
981 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
982 }
983 for name in dir(urllib.parse):
984 if name.startswith('_') or name in undocumented:
985 continue
986 object = getattr(urllib.parse, name)
987 if getattr(object, '__module__', None) == 'urllib.parse':
988 expected.append(name)
989 self.assertCountEqual(urllib.parse.__all__, expected)
990
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000991
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200992class Utility_Tests(unittest.TestCase):
993 """Testcase to test the various utility functions in the urllib."""
994 # In Python 2 this test class was in test_urllib.
995
996 def test_splittype(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -0400997 splittype = urllib.parse._splittype
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200998 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
999 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
1000 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
1001 self.assertEqual(splittype('type:'), ('type', ''))
1002 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
1003
1004 def test_splithost(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001005 splithost = urllib.parse._splithost
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001006 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
1007 ('www.example.org:80', '/foo/bar/baz.html'))
1008 self.assertEqual(splithost('//www.example.org:80'),
1009 ('www.example.org:80', ''))
1010 self.assertEqual(splithost('/foo/bar/baz.html'),
1011 (None, '/foo/bar/baz.html'))
1012
postmasters90e01e52017-06-20 06:02:44 -07001013 # bpo-30500: # starts a fragment.
1014 self.assertEqual(splithost('//127.0.0.1#@host.com'),
1015 ('127.0.0.1', '/#@host.com'))
1016 self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
1017 ('127.0.0.1', '/#@host.com:80'))
1018 self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
1019 ('127.0.0.1:80', '/#@host.com'))
1020
1021 # Empty host is returned as empty string.
1022 self.assertEqual(splithost("///file"),
1023 ('', '/file'))
1024
1025 # Trailing semicolon, question mark and hash symbol are kept.
1026 self.assertEqual(splithost("//example.net/file;"),
1027 ('example.net', '/file;'))
1028 self.assertEqual(splithost("//example.net/file?"),
1029 ('example.net', '/file?'))
1030 self.assertEqual(splithost("//example.net/file#"),
1031 ('example.net', '/file#'))
1032
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001033 def test_splituser(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001034 splituser = urllib.parse._splituser
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001035 self.assertEqual(splituser('User:Pass@www.python.org:080'),
1036 ('User:Pass', 'www.python.org:080'))
1037 self.assertEqual(splituser('@www.python.org:080'),
1038 ('', 'www.python.org:080'))
1039 self.assertEqual(splituser('www.python.org:080'),
1040 (None, 'www.python.org:080'))
1041 self.assertEqual(splituser('User:Pass@'),
1042 ('User:Pass', ''))
1043 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
1044 ('User@example.com:Pass', 'www.python.org:080'))
1045
1046 def test_splitpasswd(self):
1047 # Some of the password examples are not sensible, but it is added to
1048 # confirming to RFC2617 and addressing issue4675.
Cheryl Sabella867b8252018-06-03 10:31:32 -04001049 splitpasswd = urllib.parse._splitpasswd
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001050 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1051 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1052 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1053 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1054 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1055 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1056 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1057 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1058 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1059 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1060 self.assertEqual(splitpasswd('user:'), ('user', ''))
1061 self.assertEqual(splitpasswd('user'), ('user', None))
1062 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1063
1064 def test_splitport(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001065 splitport = urllib.parse._splitport
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001066 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1067 self.assertEqual(splitport('parrot'), ('parrot', None))
1068 self.assertEqual(splitport('parrot:'), ('parrot', None))
1069 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1070 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1071 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1072 self.assertEqual(splitport('[::1]'), ('[::1]', None))
1073 self.assertEqual(splitport(':88'), ('', '88'))
1074
1075 def test_splitnport(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001076 splitnport = urllib.parse._splitnport
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001077 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1078 self.assertEqual(splitnport('parrot'), ('parrot', -1))
1079 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1080 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1081 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1082 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1083 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1084 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1085 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1086
1087 def test_splitquery(self):
1088 # Normal cases are exercised by other tests; ensure that we also
1089 # catch cases with no port specified (testcase ensuring coverage)
Cheryl Sabella867b8252018-06-03 10:31:32 -04001090 splitquery = urllib.parse._splitquery
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001091 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1092 ('http://python.org/fake', 'foo=bar'))
1093 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1094 ('http://python.org/fake?foo=bar', ''))
1095 self.assertEqual(splitquery('http://python.org/fake'),
1096 ('http://python.org/fake', None))
1097 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1098
1099 def test_splittag(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001100 splittag = urllib.parse._splittag
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001101 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1102 ('http://example.com?foo=bar', 'baz'))
1103 self.assertEqual(splittag('http://example.com?foo=bar#'),
1104 ('http://example.com?foo=bar', ''))
1105 self.assertEqual(splittag('#baz'), ('', 'baz'))
1106 self.assertEqual(splittag('http://example.com?foo=bar'),
1107 ('http://example.com?foo=bar', None))
1108 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1109 ('http://example.com?foo=bar#baz', 'boo'))
1110
1111 def test_splitattr(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001112 splitattr = urllib.parse._splitattr
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001113 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1114 ('/path', ['attr1=value1', 'attr2=value2']))
1115 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1116 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1117 ('', ['attr1=value1', 'attr2=value2']))
1118 self.assertEqual(splitattr('/path'), ('/path', []))
1119
1120 def test_splitvalue(self):
1121 # Normal cases are exercised by other tests; test pathological cases
1122 # with no key/value pairs. (testcase ensuring coverage)
Cheryl Sabella867b8252018-06-03 10:31:32 -04001123 splitvalue = urllib.parse._splitvalue
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001124 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1125 self.assertEqual(splitvalue('foo='), ('foo', ''))
1126 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1127 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1128 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1129
1130 def test_to_bytes(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001131 result = urllib.parse._to_bytes('http://www.python.org')
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001132 self.assertEqual(result, 'http://www.python.org')
Cheryl Sabella0250de42018-04-25 16:51:54 -07001133 self.assertRaises(UnicodeError, urllib.parse._to_bytes,
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001134 'http://www.python.org/medi\u00e6val')
1135
1136 def test_unwrap(self):
Cheryl Sabella867b8252018-06-03 10:31:32 -04001137 url = urllib.parse._unwrap('<URL:type://host/path>')
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001138 self.assertEqual(url, 'type://host/path')
1139
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001140
Cheryl Sabella0250de42018-04-25 16:51:54 -07001141class DeprecationTest(unittest.TestCase):
1142
1143 def test_splittype_deprecation(self):
1144 with self.assertWarns(DeprecationWarning) as cm:
1145 urllib.parse.splittype('')
1146 self.assertEqual(str(cm.warning),
1147 'urllib.parse.splittype() is deprecated as of 3.8, '
1148 'use urllib.parse.urlparse() instead')
1149
1150 def test_splithost_deprecation(self):
1151 with self.assertWarns(DeprecationWarning) as cm:
1152 urllib.parse.splithost('')
1153 self.assertEqual(str(cm.warning),
1154 'urllib.parse.splithost() is deprecated as of 3.8, '
1155 'use urllib.parse.urlparse() instead')
1156
1157 def test_splituser_deprecation(self):
1158 with self.assertWarns(DeprecationWarning) as cm:
1159 urllib.parse.splituser('')
1160 self.assertEqual(str(cm.warning),
1161 'urllib.parse.splituser() is deprecated as of 3.8, '
1162 'use urllib.parse.urlparse() instead')
1163
1164 def test_splitpasswd_deprecation(self):
1165 with self.assertWarns(DeprecationWarning) as cm:
1166 urllib.parse.splitpasswd('')
1167 self.assertEqual(str(cm.warning),
1168 'urllib.parse.splitpasswd() is deprecated as of 3.8, '
1169 'use urllib.parse.urlparse() instead')
1170
1171 def test_splitport_deprecation(self):
1172 with self.assertWarns(DeprecationWarning) as cm:
1173 urllib.parse.splitport('')
1174 self.assertEqual(str(cm.warning),
1175 'urllib.parse.splitport() is deprecated as of 3.8, '
1176 'use urllib.parse.urlparse() instead')
1177
1178 def test_splitnport_deprecation(self):
1179 with self.assertWarns(DeprecationWarning) as cm:
1180 urllib.parse.splitnport('')
1181 self.assertEqual(str(cm.warning),
1182 'urllib.parse.splitnport() is deprecated as of 3.8, '
1183 'use urllib.parse.urlparse() instead')
1184
1185 def test_splitquery_deprecation(self):
1186 with self.assertWarns(DeprecationWarning) as cm:
1187 urllib.parse.splitquery('')
1188 self.assertEqual(str(cm.warning),
1189 'urllib.parse.splitquery() is deprecated as of 3.8, '
1190 'use urllib.parse.urlparse() instead')
1191
1192 def test_splittag_deprecation(self):
1193 with self.assertWarns(DeprecationWarning) as cm:
1194 urllib.parse.splittag('')
1195 self.assertEqual(str(cm.warning),
1196 'urllib.parse.splittag() is deprecated as of 3.8, '
1197 'use urllib.parse.urlparse() instead')
1198
1199 def test_splitattr_deprecation(self):
1200 with self.assertWarns(DeprecationWarning) as cm:
1201 urllib.parse.splitattr('')
1202 self.assertEqual(str(cm.warning),
1203 'urllib.parse.splitattr() is deprecated as of 3.8, '
1204 'use urllib.parse.urlparse() instead')
1205
1206 def test_splitvalue_deprecation(self):
1207 with self.assertWarns(DeprecationWarning) as cm:
1208 urllib.parse.splitvalue('')
1209 self.assertEqual(str(cm.warning),
1210 'urllib.parse.splitvalue() is deprecated as of 3.8, '
1211 'use urllib.parse.parse_qsl() instead')
1212
1213 def test_to_bytes_deprecation(self):
1214 with self.assertWarns(DeprecationWarning) as cm:
1215 urllib.parse.to_bytes('')
1216 self.assertEqual(str(cm.warning),
1217 'urllib.parse.to_bytes() is deprecated as of 3.8')
1218
1219 def test_unwrap(self):
1220 with self.assertWarns(DeprecationWarning) as cm:
1221 urllib.parse.unwrap('')
1222 self.assertEqual(str(cm.warning),
1223 'urllib.parse.unwrap() is deprecated as of 3.8')
1224
1225
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001226if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001227 unittest.main()