blob: 9165d73e8202fec891d75b8747b1c2472a9bcf77 [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00003
Fred Drakea4d18a02001-01-05 05:57:04 +00004RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00005RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00006RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00007SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00008
Ezio Melottie130a522011-10-19 10:58:56 +03009# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000010# a string with the query and a dictionary with the expected result.
11
12parse_qsl_test_cases = [
13 ("", []),
14 ("&", []),
15 ("&&", []),
16 ("=", [('', '')]),
17 ("=a", [('', 'a')]),
18 ("a", [('a', '')]),
19 ("a=", [('a', '')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000020 ("&a=b", [('a', 'b')]),
21 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
22 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000023 (b"", []),
24 (b"&", []),
25 (b"&&", []),
26 (b"=", [(b'', b'')]),
27 (b"=a", [(b'', b'a')]),
28 (b"a", [(b'a', b'')]),
29 (b"a=", [(b'a', b'')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000030 (b"&a=b", [(b'a', b'b')]),
31 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
32 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Senthil Kumarane38415e2016-04-16 07:33:15 -070033 (";", []),
34 (";;", []),
35 (";a=b", [('a', 'b')]),
36 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
37 ("a=1;a=2", [('a', '1'), ('a', '2')]),
38 (b";", []),
39 (b";;", []),
40 (b";a=b", [(b'a', b'b')]),
41 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
42 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
43]
44
45parse_qs_test_cases = [
46 ("", {}),
47 ("&", {}),
48 ("&&", {}),
49 ("=", {'': ['']}),
50 ("=a", {'': ['a']}),
51 ("a", {'a': ['']}),
52 ("a=", {'a': ['']}),
53 ("&a=b", {'a': ['b']}),
54 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
55 ("a=1&a=2", {'a': ['1', '2']}),
56 (b"", {}),
57 (b"&", {}),
58 (b"&&", {}),
59 (b"=", {b'': [b'']}),
60 (b"=a", {b'': [b'a']}),
61 (b"a", {b'a': [b'']}),
62 (b"a=", {b'a': [b'']}),
63 (b"&a=b", {b'a': [b'b']}),
64 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
65 (b"a=1&a=2", {b'a': [b'1', b'2']}),
66 (";", {}),
67 (";;", {}),
68 (";a=b", {'a': ['b']}),
69 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
70 ("a=1;a=2", {'a': ['1', '2']}),
71 (b";", {}),
72 (b";;", {}),
73 (b";a=b", {b'a': [b'b']}),
74 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
75 (b"a=1;a=2", {b'a': [b'1', b'2']}),
Facundo Batistac469d4c2008-09-03 22:49:01 +000076]
77
Skip Montanaro6ec967d2002-03-23 05:32:10 +000078class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000079
80 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000081 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000082 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000083 t = (result.scheme, result.netloc, result.path,
84 result.params, result.query, result.fragment)
85 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000086 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000087 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000088 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000089 self.assertEqual(result2, result.geturl())
90
91 # the result of geturl() is a fixpoint; we can always parse it
92 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000093 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000094 self.assertEqual(result3.geturl(), result.geturl())
95 self.assertEqual(result3, result)
96 self.assertEqual(result3.scheme, result.scheme)
97 self.assertEqual(result3.netloc, result.netloc)
98 self.assertEqual(result3.path, result.path)
99 self.assertEqual(result3.params, result.params)
100 self.assertEqual(result3.query, result.query)
101 self.assertEqual(result3.fragment, result.fragment)
102 self.assertEqual(result3.username, result.username)
103 self.assertEqual(result3.password, result.password)
104 self.assertEqual(result3.hostname, result.hostname)
105 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000106
107 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000109 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000110 t = (result.scheme, result.netloc, result.path,
111 result.query, result.fragment)
112 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000114 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000115 self.assertEqual(result2, result.geturl())
116
117 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000118 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000119 self.assertEqual(result3.geturl(), result.geturl())
120 self.assertEqual(result3, result)
121 self.assertEqual(result3.scheme, result.scheme)
122 self.assertEqual(result3.netloc, result.netloc)
123 self.assertEqual(result3.path, result.path)
124 self.assertEqual(result3.query, result.query)
125 self.assertEqual(result3.fragment, result.fragment)
126 self.assertEqual(result3.username, result.username)
127 self.assertEqual(result3.password, result.password)
128 self.assertEqual(result3.hostname, result.hostname)
129 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000130
Facundo Batistac469d4c2008-09-03 22:49:01 +0000131 def test_qsl(self):
132 for orig, expect in parse_qsl_test_cases:
133 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +0800134 self.assertEqual(result, expect, "Error parsing %r" % orig)
135 expect_without_blanks = [v for v in expect if len(v[1])]
136 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
137 self.assertEqual(result, expect_without_blanks,
138 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000139
Senthil Kumarane38415e2016-04-16 07:33:15 -0700140 def test_qs(self):
141 for orig, expect in parse_qs_test_cases:
142 result = urllib.parse.parse_qs(orig, keep_blank_values=True)
143 self.assertEqual(result, expect, "Error parsing %r" % orig)
144 expect_without_blanks = {v: expect[v]
145 for v in expect if len(expect[v][0])}
146 result = urllib.parse.parse_qs(orig, keep_blank_values=False)
147 self.assertEqual(result, expect_without_blanks,
148 "Error parsing %r" % orig)
149
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000150 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000151 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000152 ('file:///tmp/junk.txt',
153 ('file', '', '/tmp/junk.txt', '', '', ''),
154 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000155 ('imap://mail.python.org/mbox1',
156 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
157 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000158 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000159 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
160 '', '', ''),
161 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
162 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000163 ('nfs://server/path/to/file.txt',
164 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
165 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000166 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
167 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
168 '', '', ''),
169 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000170 '', '')),
171 ('git+ssh://git@github.com/user/project.git',
172 ('git+ssh', 'git@github.com','/user/project.git',
173 '','',''),
174 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000175 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000176 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000177 def _encode(t):
178 return (t[0].encode('ascii'),
179 tuple(x.encode('ascii') for x in t[1]),
180 tuple(x.encode('ascii') for x in t[2]))
181 bytes_cases = [_encode(x) for x in str_cases]
182 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000183 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000184
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000185 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000186 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000187 # so we test both 'http:' and 'https:' in all the following.
188 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000189 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000190 ('://www.python.org',
191 ('www.python.org', '', '', '', ''),
192 ('www.python.org', '', '', '')),
193 ('://www.python.org#abc',
194 ('www.python.org', '', '', '', 'abc'),
195 ('www.python.org', '', '', 'abc')),
196 ('://www.python.org?q=abc',
197 ('www.python.org', '', '', 'q=abc', ''),
198 ('www.python.org', '', 'q=abc', '')),
199 ('://www.python.org/#abc',
200 ('www.python.org', '/', '', '', 'abc'),
201 ('www.python.org', '/', '', 'abc')),
202 ('://a/b/c/d;p?q#f',
203 ('a', '/b/c/d', 'p', 'q', 'f'),
204 ('a', '/b/c/d;p', 'q', 'f')),
205 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000206 def _encode(t):
207 return (t[0].encode('ascii'),
208 tuple(x.encode('ascii') for x in t[1]),
209 tuple(x.encode('ascii') for x in t[2]))
210 bytes_cases = [_encode(x) for x in str_cases]
211 str_schemes = ('http', 'https')
212 bytes_schemes = (b'http', b'https')
213 str_tests = str_schemes, str_cases
214 bytes_tests = bytes_schemes, bytes_cases
215 for schemes, test_cases in (str_tests, bytes_tests):
216 for scheme in schemes:
217 for url, parsed, split in test_cases:
218 url = scheme + url
219 parsed = (scheme,) + parsed
220 split = (scheme,) + split
221 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000222
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000223 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000224 str_components = (base, relurl, expected)
225 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
226 bytes_components = baseb, relurlb, expectedb = [
227 x.encode('ascii') for x in str_components]
228 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000229
230 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000231 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
232 bytes_cases = [x.encode('ascii') for x in str_cases]
233 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
235 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000236
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000237 def test_RFC1808(self):
238 # "normal" cases from RFC 1808:
239 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
240 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
241 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
242 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
243 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
244 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000245 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
246 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
247 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
248 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
249 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
250 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000251 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
252 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
253 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
254 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
255 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
256 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
257 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
258 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
259 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
260 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000261
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000262 # "abnormal" cases from RFC 1808:
263 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000264 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
265 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
266 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
267 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
268 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
269 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
270 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
271 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000272
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000273 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
274 # so we'll not actually run these tests (which expect 1808 behavior).
275 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
276 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000277
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400278 # XXX: The following tests are no longer compatible with RFC3986
279 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
280 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
281 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
282 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
283
284
Senthil Kumaran397eb442011-04-15 18:20:24 +0800285 def test_RFC2368(self):
286 # Issue 11467: path that starts with a number is not parsed correctly
287 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
288 ('mailto', '', '1337@example.org', '', '', ''))
289
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000290 def test_RFC2396(self):
291 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000292
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000293
294 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
295 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
296 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
297 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
298 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
299 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
300 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
301 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
302 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
303 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
304 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
305 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
306 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
307 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
308 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
309 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
310 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
311 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
312 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
313 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
314 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000315 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
316 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
317 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
318 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
319 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
320 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
321 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
322 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
323 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
324 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
325 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
326 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
327 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
328 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
329
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400330 # XXX: The following tests are no longer compatible with RFC3986
331 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
332 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
333 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
334 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
335
336
Facundo Batista23e38562008-08-14 16:55:14 +0000337 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000338 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000339 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400340 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000341 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
342 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
343 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
344 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
345 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
346 self.checkJoin(RFC3986_BASE, '//g','http://g')
347 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
348 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
349 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
350 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
351 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
352 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
353 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
354 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
355 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
356 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
357 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
358 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
359 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
360 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
361 self.checkJoin(RFC3986_BASE, '../..','http://a/')
362 self.checkJoin(RFC3986_BASE, '../../','http://a/')
363 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400364 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000365
366 #Abnormal Examples
367
368 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
369 # Tests are here for reference.
370
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400371 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
372 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
373 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
374 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000375 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
376 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
377 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
378 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
379 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
380 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
381 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
382 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
383 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
384 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
385 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
386 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
387 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
388 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
389 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
390 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000391
Senthil Kumarandca5b862010-12-17 04:48:45 +0000392 # Test for issue9721
393 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
394
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000395 def test_urljoins(self):
396 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
397 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
398 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
399 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
400 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
401 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
402 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
403 self.checkJoin(SIMPLE_BASE, '//g','http://g')
404 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
405 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
406 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
407 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
408 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
409 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
410 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
411 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
412 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
413 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000414 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
415 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000416 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
417 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
418 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
419 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
420 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
421 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
422 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800423 self.checkJoin('http:///', '..','http:///')
424 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
425 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800426 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800427 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000428
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400429 # XXX: The following tests are no longer compatible with RFC3986
430 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
431 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
432
Senthil Kumarana66e3882014-09-22 15:49:16 +0800433 # test for issue22118 duplicate slashes
434 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
435
436 # Non-RFC-defined tests, covering variations of base and trailing
437 # slashes
438 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
439 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
440 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
441 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
442 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
443 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
444
Berker Peksag20416f72015-04-16 02:31:14 +0300445 # issue 23703: don't duplicate filename
446 self.checkJoin('a', 'b', 'b')
447
Senthil Kumaranad02d232010-04-16 03:02:13 +0000448 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000449 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000450 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
451 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
452 ('http://[::1]:5432/foo/', '::1', 5432),
453 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
454 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
455 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
456 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
457 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
458 ('http://[::ffff:12.34.56.78]:5432/foo/',
459 '::ffff:12.34.56.78', 5432),
460 ('http://Test.python.org/foo/', 'test.python.org', None),
461 ('http://12.34.56.78/foo/', '12.34.56.78', None),
462 ('http://[::1]/foo/', '::1', None),
463 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
464 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
465 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
466 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
467 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
468 ('http://[::ffff:12.34.56.78]/foo/',
469 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200470 ('http://Test.python.org:/foo/', 'test.python.org', None),
471 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
472 ('http://[::1]:/foo/', '::1', None),
473 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
474 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
475 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
476 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
477 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
478 ('http://[::ffff:12.34.56.78]:/foo/',
479 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000480 ]
481 def _encode(t):
482 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
483 bytes_cases = [_encode(x) for x in str_cases]
484 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000485 urlparsed = urllib.parse.urlparse(url)
486 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
487
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000488 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000489 'http://::12.34.56.78]/',
490 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000491 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000492 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000493 'http://[::ffff:12.34.56.78']
494 bytes_cases = [x.encode('ascii') for x in str_cases]
495 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000496 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000497
Fred Drake70705652002-10-16 21:02:36 +0000498 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000499 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000500 ('http://python.org#frag', 'http://python.org', 'frag'),
501 ('http://python.org', 'http://python.org', ''),
502 ('http://python.org/#frag', 'http://python.org/', 'frag'),
503 ('http://python.org/', 'http://python.org/', ''),
504 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
505 ('http://python.org/?q', 'http://python.org/?q', ''),
506 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
507 ('http://python.org/p?q', 'http://python.org/p?q', ''),
508 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
509 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000510 ]
511 def _encode(t):
512 return type(t)(x.encode('ascii') for x in t)
513 bytes_cases = [_encode(x) for x in str_cases]
514 for url, defrag, frag in str_cases + bytes_cases:
515 result = urllib.parse.urldefrag(url)
516 self.assertEqual(result.geturl(), url)
517 self.assertEqual(result, (defrag, frag))
518 self.assertEqual(result.url, defrag)
519 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000520
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000521 def test_urlsplit_attributes(self):
522 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000523 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000524 self.assertEqual(p.scheme, "http")
525 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
526 self.assertEqual(p.path, "/doc/")
527 self.assertEqual(p.query, "")
528 self.assertEqual(p.fragment, "frag")
529 self.assertEqual(p.username, None)
530 self.assertEqual(p.password, None)
531 self.assertEqual(p.hostname, "www.python.org")
532 self.assertEqual(p.port, None)
533 # geturl() won't return exactly the original URL in this case
534 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000535 # We handle this by ignoring the first 4 characters of the URL
536 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000537
538 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000539 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000540 self.assertEqual(p.scheme, "http")
541 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
542 self.assertEqual(p.path, "/doc/")
543 self.assertEqual(p.query, "query=yes")
544 self.assertEqual(p.fragment, "frag")
545 self.assertEqual(p.username, "User")
546 self.assertEqual(p.password, "Pass")
547 self.assertEqual(p.hostname, "www.python.org")
548 self.assertEqual(p.port, 80)
549 self.assertEqual(p.geturl(), url)
550
Christian Heimesfaf2f632008-01-06 16:59:19 +0000551 # Addressing issue1698, which suggests Username can contain
552 # "@" characters. Though not RFC compliant, many ftp sites allow
553 # and request email addresses as usernames.
554
555 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000556 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000557 self.assertEqual(p.scheme, "http")
558 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
559 self.assertEqual(p.path, "/doc/")
560 self.assertEqual(p.query, "query=yes")
561 self.assertEqual(p.fragment, "frag")
562 self.assertEqual(p.username, "User@example.com")
563 self.assertEqual(p.password, "Pass")
564 self.assertEqual(p.hostname, "www.python.org")
565 self.assertEqual(p.port, 80)
566 self.assertEqual(p.geturl(), url)
567
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000568 # And check them all again, only with bytes this time
569 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
570 p = urllib.parse.urlsplit(url)
571 self.assertEqual(p.scheme, b"http")
572 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
573 self.assertEqual(p.path, b"/doc/")
574 self.assertEqual(p.query, b"")
575 self.assertEqual(p.fragment, b"frag")
576 self.assertEqual(p.username, None)
577 self.assertEqual(p.password, None)
578 self.assertEqual(p.hostname, b"www.python.org")
579 self.assertEqual(p.port, None)
580 self.assertEqual(p.geturl()[4:], url[4:])
581
582 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
583 p = urllib.parse.urlsplit(url)
584 self.assertEqual(p.scheme, b"http")
585 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
586 self.assertEqual(p.path, b"/doc/")
587 self.assertEqual(p.query, b"query=yes")
588 self.assertEqual(p.fragment, b"frag")
589 self.assertEqual(p.username, b"User")
590 self.assertEqual(p.password, b"Pass")
591 self.assertEqual(p.hostname, b"www.python.org")
592 self.assertEqual(p.port, 80)
593 self.assertEqual(p.geturl(), url)
594
595 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
596 p = urllib.parse.urlsplit(url)
597 self.assertEqual(p.scheme, b"http")
598 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
599 self.assertEqual(p.path, b"/doc/")
600 self.assertEqual(p.query, b"query=yes")
601 self.assertEqual(p.fragment, b"frag")
602 self.assertEqual(p.username, b"User@example.com")
603 self.assertEqual(p.password, b"Pass")
604 self.assertEqual(p.hostname, b"www.python.org")
605 self.assertEqual(p.port, 80)
606 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000607
Robert Collinsdfa95c92015-08-10 09:53:30 +1200608 # Verify an illegal port raises ValueError
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800609 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
610 p = urllib.parse.urlsplit(url)
Robert Collinsdfa95c92015-08-10 09:53:30 +1200611 with self.assertRaisesRegex(ValueError, "out of range"):
612 p.port
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800613
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000614 def test_attributes_bad_port(self):
Robert Collinsdfa95c92015-08-10 09:53:30 +1200615 """Check handling of invalid ports."""
616 for bytes in (False, True):
617 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
618 for port in ("foo", "1.5", "-1", "0x10"):
619 with self.subTest(bytes=bytes, parse=parse, port=port):
620 netloc = "www.example.net:" + port
621 url = "http://" + netloc
622 if bytes:
623 netloc = netloc.encode("ascii")
624 url = url.encode("ascii")
625 p = parse(url)
626 self.assertEqual(p.netloc, netloc)
627 with self.assertRaises(ValueError):
628 p.port
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000629
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000630 def test_attributes_without_netloc(self):
631 # This example is straight from RFC 3261. It looks like it
632 # should allow the username, hostname, and port to be filled
633 # in, but doesn't. Since it's a URI and doesn't use the
634 # scheme://netloc syntax, the netloc and related attributes
635 # should be left empty.
636 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000637 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000638 self.assertEqual(p.netloc, "")
639 self.assertEqual(p.username, None)
640 self.assertEqual(p.password, None)
641 self.assertEqual(p.hostname, None)
642 self.assertEqual(p.port, None)
643 self.assertEqual(p.geturl(), uri)
644
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000645 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000646 self.assertEqual(p.netloc, "")
647 self.assertEqual(p.username, None)
648 self.assertEqual(p.password, None)
649 self.assertEqual(p.hostname, None)
650 self.assertEqual(p.port, None)
651 self.assertEqual(p.geturl(), uri)
652
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000653 # You guessed it, repeating the test with bytes input
654 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
655 p = urllib.parse.urlsplit(uri)
656 self.assertEqual(p.netloc, b"")
657 self.assertEqual(p.username, None)
658 self.assertEqual(p.password, None)
659 self.assertEqual(p.hostname, None)
660 self.assertEqual(p.port, None)
661 self.assertEqual(p.geturl(), uri)
662
663 p = urllib.parse.urlparse(uri)
664 self.assertEqual(p.netloc, b"")
665 self.assertEqual(p.username, None)
666 self.assertEqual(p.password, None)
667 self.assertEqual(p.hostname, None)
668 self.assertEqual(p.port, None)
669 self.assertEqual(p.geturl(), uri)
670
Christian Heimesfaf2f632008-01-06 16:59:19 +0000671 def test_noslash(self):
672 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000673 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000674 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000675 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
676 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000677
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000678 def test_withoutscheme(self):
679 # Test urlparse without scheme
680 # Issue 754016: urlparse goes wrong with IP:port without scheme
681 # RFC 1808 specifies that netloc should start with //, urlparse expects
682 # the same, otherwise it classifies the portion of url as path.
683 self.assertEqual(urllib.parse.urlparse("path"),
684 ('','','path','','',''))
685 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
686 ('','www.python.org:80','','','',''))
687 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
688 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000689 # Repeat for bytes input
690 self.assertEqual(urllib.parse.urlparse(b"path"),
691 (b'',b'',b'path',b'',b'',b''))
692 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
693 (b'',b'www.python.org:80',b'',b'',b'',b''))
694 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
695 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000696
697 def test_portseparator(self):
698 # Issue 754016 makes changes for port separator ':' from scheme separator
699 self.assertEqual(urllib.parse.urlparse("path:80"),
700 ('','','path:80','','',''))
701 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
702 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
703 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
704 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000705 # As usual, need to check bytes input as well
706 self.assertEqual(urllib.parse.urlparse(b"path:80"),
707 (b'',b'',b'path:80',b'',b'',b''))
708 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
709 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
710 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
711 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000712
Facundo Batista2ac5de22008-07-07 18:24:11 +0000713 def test_usingsys(self):
714 # Issue 3314: sys module is used in the error
715 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
716
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000717 def test_anyscheme(self):
718 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000719 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
720 ('s3', 'foo.com', '/stuff', '', '', ''))
721 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
722 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800723 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
724 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
725 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
726 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
727
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000728 # And for bytes...
729 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
730 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
731 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
732 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800733 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
734 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
735 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
736 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000737
Berker Peksag89584c92015-06-25 23:38:48 +0300738 def test_default_scheme(self):
739 # Exercise the scheme parameter of urlparse() and urlsplit()
740 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
741 with self.subTest(function=func):
742 result = func("http://example.net/", "ftp")
743 self.assertEqual(result.scheme, "http")
744 result = func(b"http://example.net/", b"ftp")
745 self.assertEqual(result.scheme, b"http")
746 self.assertEqual(func("path", "ftp").scheme, "ftp")
747 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
748 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
749 self.assertEqual(func("path").scheme, "")
750 self.assertEqual(func(b"path").scheme, b"")
751 self.assertEqual(func(b"path", "").scheme, b"")
752
753 def test_parse_fragments(self):
754 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
755 tests = (
756 ("http:#frag", "path"),
757 ("//example.net#frag", "path"),
758 ("index.html#frag", "path"),
759 (";a=b#frag", "params"),
760 ("?a=b#frag", "query"),
761 ("#frag", "path"),
762 )
763 for url, attr in tests:
764 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
765 if attr == "params" and func is urllib.parse.urlsplit:
766 attr = "path"
767 with self.subTest(url=url, function=func):
768 result = func(url, allow_fragments=False)
769 self.assertEqual(result.fragment, "")
770 self.assertTrue(getattr(result, attr).endswith("#frag"))
771 self.assertEqual(func(url, "", False).fragment, "")
772
773 result = func(url, allow_fragments=True)
774 self.assertEqual(result.fragment, "frag")
775 self.assertFalse(getattr(result, attr).endswith("frag"))
776 self.assertEqual(func(url, "", True).fragment, "frag")
777 self.assertEqual(func(url).fragment, "frag")
778
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000779 def test_mixed_types_rejected(self):
780 # Several functions that process either strings or ASCII encoded bytes
781 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000782 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000783 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000784 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000785 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000786 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000787 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000788 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000789 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000790 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000791 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000792 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000793 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000794 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000795 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000796 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000797 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000798 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000799 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000800 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000801 urllib.parse.urljoin(b"http://python.org", "http://python.org")
802
803 def _check_result_type(self, str_type):
804 num_args = len(str_type._fields)
805 bytes_type = str_type._encoded_counterpart
806 self.assertIs(bytes_type._decoded_counterpart, str_type)
807 str_args = ('',) * num_args
808 bytes_args = (b'',) * num_args
809 str_result = str_type(*str_args)
810 bytes_result = bytes_type(*bytes_args)
811 encoding = 'ascii'
812 errors = 'strict'
813 self.assertEqual(str_result, str_args)
814 self.assertEqual(bytes_result.decode(), str_args)
815 self.assertEqual(bytes_result.decode(), str_result)
816 self.assertEqual(bytes_result.decode(encoding), str_args)
817 self.assertEqual(bytes_result.decode(encoding), str_result)
818 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
819 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
820 self.assertEqual(bytes_result, bytes_args)
821 self.assertEqual(str_result.encode(), bytes_args)
822 self.assertEqual(str_result.encode(), bytes_result)
823 self.assertEqual(str_result.encode(encoding), bytes_args)
824 self.assertEqual(str_result.encode(encoding), bytes_result)
825 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
826 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
827
828 def test_result_pairs(self):
829 # Check encoding and decoding between result pairs
830 result_types = [
831 urllib.parse.DefragResult,
832 urllib.parse.SplitResult,
833 urllib.parse.ParseResult,
834 ]
835 for result_type in result_types:
836 self._check_result_type(result_type)
837
Victor Stinner1d87deb2011-01-14 13:05:19 +0000838 def test_parse_qs_encoding(self):
839 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
840 self.assertEqual(result, {'key': ['\u0141\xE9']})
841 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
842 self.assertEqual(result, {'key': ['\u0141\xE9']})
843 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
844 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
845 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
846 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
847 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
848 errors="ignore")
849 self.assertEqual(result, {'key': ['\u0141-']})
850
851 def test_parse_qsl_encoding(self):
852 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
853 self.assertEqual(result, [('key', '\u0141\xE9')])
854 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
855 self.assertEqual(result, [('key', '\u0141\xE9')])
856 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
857 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
858 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
859 self.assertEqual(result, [('key', '\u0141\ufffd-')])
860 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
861 errors="ignore")
862 self.assertEqual(result, [('key', '\u0141-')])
863
Senthil Kumarande02a712011-07-23 18:27:45 +0800864 def test_urlencode_sequences(self):
865 # Other tests incidentally urlencode things; test non-covered cases:
866 # Sequence and object values.
867 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100868 # we cannot rely on ordering here
869 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800870
871 class Trivial:
872 def __str__(self):
873 return 'trivial'
874
875 result = urllib.parse.urlencode({'a': Trivial()}, True)
876 self.assertEqual(result, 'a=trivial')
877
R David Murrayc17686f2015-05-17 20:44:50 -0400878 def test_urlencode_quote_via(self):
879 result = urllib.parse.urlencode({'a': 'some value'})
880 self.assertEqual(result, "a=some+value")
881 result = urllib.parse.urlencode({'a': 'some value/another'},
882 quote_via=urllib.parse.quote)
883 self.assertEqual(result, "a=some%20value%2Fanother")
884 result = urllib.parse.urlencode({'a': 'some value/another'},
885 safe='/', quote_via=urllib.parse.quote)
886 self.assertEqual(result, "a=some%20value/another")
887
Senthil Kumarande02a712011-07-23 18:27:45 +0800888 def test_quote_from_bytes(self):
889 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
890 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
891 self.assertEqual(result, 'archaeological%20arcana')
892 result = urllib.parse.quote_from_bytes(b'')
893 self.assertEqual(result, '')
894
895 def test_unquote_to_bytes(self):
896 result = urllib.parse.unquote_to_bytes('abc%20def')
897 self.assertEqual(result, b'abc def')
898 result = urllib.parse.unquote_to_bytes('')
899 self.assertEqual(result, b'')
900
901 def test_quote_errors(self):
902 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
903 encoding='utf-8')
904 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000905
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300906 def test_issue14072(self):
907 p1 = urllib.parse.urlsplit('tel:+31-641044153')
908 self.assertEqual(p1.scheme, 'tel')
909 self.assertEqual(p1.path, '+31-641044153')
910 p2 = urllib.parse.urlsplit('tel:+31641044153')
911 self.assertEqual(p2.scheme, 'tel')
912 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800913 # assert the behavior for urlparse
914 p1 = urllib.parse.urlparse('tel:+31-641044153')
915 self.assertEqual(p1.scheme, 'tel')
916 self.assertEqual(p1.path, '+31-641044153')
917 p2 = urllib.parse.urlparse('tel:+31641044153')
918 self.assertEqual(p2.scheme, 'tel')
919 self.assertEqual(p2.path, '+31641044153')
920
921 def test_telurl_params(self):
922 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
923 self.assertEqual(p1.scheme, 'tel')
924 self.assertEqual(p1.path, '123-4')
925 self.assertEqual(p1.params, 'phone-context=+1-650-516')
926
927 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
928 self.assertEqual(p1.scheme, 'tel')
929 self.assertEqual(p1.path, '+1-201-555-0123')
930 self.assertEqual(p1.params, '')
931
932 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
933 self.assertEqual(p1.scheme, 'tel')
934 self.assertEqual(p1.path, '7042')
935 self.assertEqual(p1.params, 'phone-context=example.com')
936
937 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
938 self.assertEqual(p1.scheme, 'tel')
939 self.assertEqual(p1.path, '863-1234')
940 self.assertEqual(p1.params, 'phone-context=+1-914-555')
941
R David Murrayf5163882013-03-21 20:56:51 -0400942 def test_Quoter_repr(self):
943 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
944 self.assertIn('Quoter', repr(quoter))
945
Serhiy Storchaka15154502015-04-07 19:09:01 +0300946 def test_all(self):
947 expected = []
948 undocumented = {
949 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
950 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
951 'splitvalue',
952 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
953 }
954 for name in dir(urllib.parse):
955 if name.startswith('_') or name in undocumented:
956 continue
957 object = getattr(urllib.parse, name)
958 if getattr(object, '__module__', None) == 'urllib.parse':
959 expected.append(name)
960 self.assertCountEqual(urllib.parse.__all__, expected)
961
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000962
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200963class Utility_Tests(unittest.TestCase):
964 """Testcase to test the various utility functions in the urllib."""
965 # In Python 2 this test class was in test_urllib.
966
967 def test_splittype(self):
968 splittype = urllib.parse.splittype
969 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
970 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
971 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
972 self.assertEqual(splittype('type:'), ('type', ''))
973 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
974
975 def test_splithost(self):
976 splithost = urllib.parse.splithost
977 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
978 ('www.example.org:80', '/foo/bar/baz.html'))
979 self.assertEqual(splithost('//www.example.org:80'),
980 ('www.example.org:80', ''))
981 self.assertEqual(splithost('/foo/bar/baz.html'),
982 (None, '/foo/bar/baz.html'))
983
984 def test_splituser(self):
985 splituser = urllib.parse.splituser
986 self.assertEqual(splituser('User:Pass@www.python.org:080'),
987 ('User:Pass', 'www.python.org:080'))
988 self.assertEqual(splituser('@www.python.org:080'),
989 ('', 'www.python.org:080'))
990 self.assertEqual(splituser('www.python.org:080'),
991 (None, 'www.python.org:080'))
992 self.assertEqual(splituser('User:Pass@'),
993 ('User:Pass', ''))
994 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
995 ('User@example.com:Pass', 'www.python.org:080'))
996
997 def test_splitpasswd(self):
998 # Some of the password examples are not sensible, but it is added to
999 # confirming to RFC2617 and addressing issue4675.
1000 splitpasswd = urllib.parse.splitpasswd
1001 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1002 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1003 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1004 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1005 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1006 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1007 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1008 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1009 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1010 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1011 self.assertEqual(splitpasswd('user:'), ('user', ''))
1012 self.assertEqual(splitpasswd('user'), ('user', None))
1013 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1014
1015 def test_splitport(self):
1016 splitport = urllib.parse.splitport
1017 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1018 self.assertEqual(splitport('parrot'), ('parrot', None))
1019 self.assertEqual(splitport('parrot:'), ('parrot', None))
1020 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1021 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1022 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1023 self.assertEqual(splitport('[::1]'), ('[::1]', None))
1024 self.assertEqual(splitport(':88'), ('', '88'))
1025
1026 def test_splitnport(self):
1027 splitnport = urllib.parse.splitnport
1028 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1029 self.assertEqual(splitnport('parrot'), ('parrot', -1))
1030 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1031 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1032 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1033 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1034 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1035 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1036 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1037
1038 def test_splitquery(self):
1039 # Normal cases are exercised by other tests; ensure that we also
1040 # catch cases with no port specified (testcase ensuring coverage)
1041 splitquery = urllib.parse.splitquery
1042 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1043 ('http://python.org/fake', 'foo=bar'))
1044 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1045 ('http://python.org/fake?foo=bar', ''))
1046 self.assertEqual(splitquery('http://python.org/fake'),
1047 ('http://python.org/fake', None))
1048 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1049
1050 def test_splittag(self):
1051 splittag = urllib.parse.splittag
1052 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1053 ('http://example.com?foo=bar', 'baz'))
1054 self.assertEqual(splittag('http://example.com?foo=bar#'),
1055 ('http://example.com?foo=bar', ''))
1056 self.assertEqual(splittag('#baz'), ('', 'baz'))
1057 self.assertEqual(splittag('http://example.com?foo=bar'),
1058 ('http://example.com?foo=bar', None))
1059 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1060 ('http://example.com?foo=bar#baz', 'boo'))
1061
1062 def test_splitattr(self):
1063 splitattr = urllib.parse.splitattr
1064 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1065 ('/path', ['attr1=value1', 'attr2=value2']))
1066 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1067 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1068 ('', ['attr1=value1', 'attr2=value2']))
1069 self.assertEqual(splitattr('/path'), ('/path', []))
1070
1071 def test_splitvalue(self):
1072 # Normal cases are exercised by other tests; test pathological cases
1073 # with no key/value pairs. (testcase ensuring coverage)
1074 splitvalue = urllib.parse.splitvalue
1075 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1076 self.assertEqual(splitvalue('foo='), ('foo', ''))
1077 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1078 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1079 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1080
1081 def test_to_bytes(self):
1082 result = urllib.parse.to_bytes('http://www.python.org')
1083 self.assertEqual(result, 'http://www.python.org')
1084 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
1085 'http://www.python.org/medi\u00e6val')
1086
1087 def test_unwrap(self):
1088 url = urllib.parse.unwrap('<URL:type://host/path>')
1089 self.assertEqual(url, 'type://host/path')
1090
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001091
1092if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001093 unittest.main()