blob: 0552f90594a3c3b29cc8e51a969398cc0e795293 [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00003
Fred Drakea4d18a02001-01-05 05:57:04 +00004RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00005RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00006RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00007SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00008
Ezio Melottie130a522011-10-19 10:58:56 +03009# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000010# a string with the query and a dictionary with the expected result.
11
12parse_qsl_test_cases = [
13 ("", []),
14 ("&", []),
15 ("&&", []),
16 ("=", [('', '')]),
17 ("=a", [('', 'a')]),
18 ("a", [('a', '')]),
19 ("a=", [('a', '')]),
20 ("a=", [('a', '')]),
21 ("&a=b", [('a', 'b')]),
22 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
23 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000024 (b"", []),
25 (b"&", []),
26 (b"&&", []),
27 (b"=", [(b'', b'')]),
28 (b"=a", [(b'', b'a')]),
29 (b"a", [(b'a', b'')]),
30 (b"a=", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
32 (b"&a=b", [(b'a', b'b')]),
33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000035]
36
Skip Montanaro6ec967d2002-03-23 05:32:10 +000037class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000038
39 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000040 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000042 t = (result.scheme, result.netloc, result.path,
43 result.params, result.query, result.fragment)
44 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000045 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000047 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000048 self.assertEqual(result2, result.geturl())
49
50 # the result of geturl() is a fixpoint; we can always parse it
51 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000052 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000053 self.assertEqual(result3.geturl(), result.geturl())
54 self.assertEqual(result3, result)
55 self.assertEqual(result3.scheme, result.scheme)
56 self.assertEqual(result3.netloc, result.netloc)
57 self.assertEqual(result3.path, result.path)
58 self.assertEqual(result3.params, result.params)
59 self.assertEqual(result3.query, result.query)
60 self.assertEqual(result3.fragment, result.fragment)
61 self.assertEqual(result3.username, result.username)
62 self.assertEqual(result3.password, result.password)
63 self.assertEqual(result3.hostname, result.hostname)
64 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000065
66 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000067 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000069 t = (result.scheme, result.netloc, result.path,
70 result.query, result.fragment)
71 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000072 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000073 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000074 self.assertEqual(result2, result.geturl())
75
76 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000077 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000078 self.assertEqual(result3.geturl(), result.geturl())
79 self.assertEqual(result3, result)
80 self.assertEqual(result3.scheme, result.scheme)
81 self.assertEqual(result3.netloc, result.netloc)
82 self.assertEqual(result3.path, result.path)
83 self.assertEqual(result3.query, result.query)
84 self.assertEqual(result3.fragment, result.fragment)
85 self.assertEqual(result3.username, result.username)
86 self.assertEqual(result3.password, result.password)
87 self.assertEqual(result3.hostname, result.hostname)
88 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000089
Facundo Batistac469d4c2008-09-03 22:49:01 +000090 def test_qsl(self):
91 for orig, expect in parse_qsl_test_cases:
92 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080093 self.assertEqual(result, expect, "Error parsing %r" % orig)
94 expect_without_blanks = [v for v in expect if len(v[1])]
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
96 self.assertEqual(result, expect_without_blanks,
97 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +000098
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000099 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000100 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000101 ('file:///tmp/junk.txt',
102 ('file', '', '/tmp/junk.txt', '', '', ''),
103 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000104 ('imap://mail.python.org/mbox1',
105 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
106 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000107 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
109 '', '', ''),
110 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
111 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000112 ('nfs://server/path/to/file.txt',
113 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
114 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000115 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
116 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
117 '', '', ''),
118 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000119 '', '')),
120 ('git+ssh://git@github.com/user/project.git',
121 ('git+ssh', 'git@github.com','/user/project.git',
122 '','',''),
123 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000124 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000125 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000126 def _encode(t):
127 return (t[0].encode('ascii'),
128 tuple(x.encode('ascii') for x in t[1]),
129 tuple(x.encode('ascii') for x in t[2]))
130 bytes_cases = [_encode(x) for x in str_cases]
131 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000132 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000133
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000134 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000136 # so we test both 'http:' and 'https:' in all the following.
137 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000138 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 ('://www.python.org',
140 ('www.python.org', '', '', '', ''),
141 ('www.python.org', '', '', '')),
142 ('://www.python.org#abc',
143 ('www.python.org', '', '', '', 'abc'),
144 ('www.python.org', '', '', 'abc')),
145 ('://www.python.org?q=abc',
146 ('www.python.org', '', '', 'q=abc', ''),
147 ('www.python.org', '', 'q=abc', '')),
148 ('://www.python.org/#abc',
149 ('www.python.org', '/', '', '', 'abc'),
150 ('www.python.org', '/', '', 'abc')),
151 ('://a/b/c/d;p?q#f',
152 ('a', '/b/c/d', 'p', 'q', 'f'),
153 ('a', '/b/c/d;p', 'q', 'f')),
154 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000155 def _encode(t):
156 return (t[0].encode('ascii'),
157 tuple(x.encode('ascii') for x in t[1]),
158 tuple(x.encode('ascii') for x in t[2]))
159 bytes_cases = [_encode(x) for x in str_cases]
160 str_schemes = ('http', 'https')
161 bytes_schemes = (b'http', b'https')
162 str_tests = str_schemes, str_cases
163 bytes_tests = bytes_schemes, bytes_cases
164 for schemes, test_cases in (str_tests, bytes_tests):
165 for scheme in schemes:
166 for url, parsed, split in test_cases:
167 url = scheme + url
168 parsed = (scheme,) + parsed
169 split = (scheme,) + split
170 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000171
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000172 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000173 str_components = (base, relurl, expected)
174 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
175 bytes_components = baseb, relurlb, expectedb = [
176 x.encode('ascii') for x in str_components]
177 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000178
179 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000180 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
181 bytes_cases = [x.encode('ascii') for x in str_cases]
182 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000183 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
184 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000185
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000186 def test_RFC1808(self):
187 # "normal" cases from RFC 1808:
188 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
189 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
190 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
191 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
192 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
193 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
195 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
196 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
197 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
198 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
199 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000200 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
201 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
202 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
203 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
204 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
205 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
206 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
207 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
208 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
209 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000210
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000211 # "abnormal" cases from RFC 1808:
212 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000213 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
214 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
215 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
216 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
217 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
218 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
219 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
220 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000221
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000222 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
223 # so we'll not actually run these tests (which expect 1808 behavior).
224 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
225 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000226
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400227 # XXX: The following tests are no longer compatible with RFC3986
228 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
229 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
230 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
231 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
232
233
Senthil Kumaran397eb442011-04-15 18:20:24 +0800234 def test_RFC2368(self):
235 # Issue 11467: path that starts with a number is not parsed correctly
236 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
237 ('mailto', '', '1337@example.org', '', '', ''))
238
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000239 def test_RFC2396(self):
240 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000241
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000242
243 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
244 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
245 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
246 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
247 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
248 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
249 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
250 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
251 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
252 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
253 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
254 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
255 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
256 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
257 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
258 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
259 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
260 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
261 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
262 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
263 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000264 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
265 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
266 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
267 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
268 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
269 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
270 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
271 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
272 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
273 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
274 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
275 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
276 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
277 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
278
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400279 # XXX: The following tests are no longer compatible with RFC3986
280 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
281 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
282 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
283 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
284
285
Facundo Batista23e38562008-08-14 16:55:14 +0000286 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000287 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000288 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400289 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000290 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
291 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
292 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
293 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
294 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
295 self.checkJoin(RFC3986_BASE, '//g','http://g')
296 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
297 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
298 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
299 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
300 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
301 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
302 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
303 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
304 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
305 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
306 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
307 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
308 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
309 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
310 self.checkJoin(RFC3986_BASE, '../..','http://a/')
311 self.checkJoin(RFC3986_BASE, '../../','http://a/')
312 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400313 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000314
315 #Abnormal Examples
316
317 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
318 # Tests are here for reference.
319
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400320 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
321 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
322 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
323 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000324 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
325 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
326 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
327 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
328 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
329 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
330 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
331 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
332 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
333 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
334 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
335 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
336 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
337 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
338 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
339 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000340
Senthil Kumarandca5b862010-12-17 04:48:45 +0000341 # Test for issue9721
342 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
343
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000344 def test_urljoins(self):
345 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
346 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
347 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
348 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
349 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
350 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
351 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
352 self.checkJoin(SIMPLE_BASE, '//g','http://g')
353 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
354 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
355 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
356 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
357 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
358 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
359 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
360 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
361 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
362 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000363 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
364 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000365 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
366 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
367 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
368 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
369 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
370 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
371 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800372 self.checkJoin('http:///', '..','http:///')
373 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
374 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800375 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800376 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000377
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400378 # XXX: The following tests are no longer compatible with RFC3986
379 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
380 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
381
Senthil Kumarana66e3882014-09-22 15:49:16 +0800382 # test for issue22118 duplicate slashes
383 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
384
385 # Non-RFC-defined tests, covering variations of base and trailing
386 # slashes
387 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
388 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
389 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
390 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
391 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
392 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
393
Berker Peksag20416f72015-04-16 02:31:14 +0300394 # issue 23703: don't duplicate filename
395 self.checkJoin('a', 'b', 'b')
396
Senthil Kumaranad02d232010-04-16 03:02:13 +0000397 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000398 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000399 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
400 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
401 ('http://[::1]:5432/foo/', '::1', 5432),
402 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
403 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
404 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
405 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
406 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
407 ('http://[::ffff:12.34.56.78]:5432/foo/',
408 '::ffff:12.34.56.78', 5432),
409 ('http://Test.python.org/foo/', 'test.python.org', None),
410 ('http://12.34.56.78/foo/', '12.34.56.78', None),
411 ('http://[::1]/foo/', '::1', None),
412 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
413 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
414 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
415 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
416 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
417 ('http://[::ffff:12.34.56.78]/foo/',
418 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200419 ('http://Test.python.org:/foo/', 'test.python.org', None),
420 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
421 ('http://[::1]:/foo/', '::1', None),
422 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
423 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
424 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
425 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
426 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
427 ('http://[::ffff:12.34.56.78]:/foo/',
428 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000429 ]
430 def _encode(t):
431 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
432 bytes_cases = [_encode(x) for x in str_cases]
433 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000434 urlparsed = urllib.parse.urlparse(url)
435 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
436
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000437 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000438 'http://::12.34.56.78]/',
439 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000440 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000441 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000442 'http://[::ffff:12.34.56.78']
443 bytes_cases = [x.encode('ascii') for x in str_cases]
444 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000445 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000446
Fred Drake70705652002-10-16 21:02:36 +0000447 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000448 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000449 ('http://python.org#frag', 'http://python.org', 'frag'),
450 ('http://python.org', 'http://python.org', ''),
451 ('http://python.org/#frag', 'http://python.org/', 'frag'),
452 ('http://python.org/', 'http://python.org/', ''),
453 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
454 ('http://python.org/?q', 'http://python.org/?q', ''),
455 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
456 ('http://python.org/p?q', 'http://python.org/p?q', ''),
457 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
458 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000459 ]
460 def _encode(t):
461 return type(t)(x.encode('ascii') for x in t)
462 bytes_cases = [_encode(x) for x in str_cases]
463 for url, defrag, frag in str_cases + bytes_cases:
464 result = urllib.parse.urldefrag(url)
465 self.assertEqual(result.geturl(), url)
466 self.assertEqual(result, (defrag, frag))
467 self.assertEqual(result.url, defrag)
468 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000469
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000470 def test_urlsplit_attributes(self):
471 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000472 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000473 self.assertEqual(p.scheme, "http")
474 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
475 self.assertEqual(p.path, "/doc/")
476 self.assertEqual(p.query, "")
477 self.assertEqual(p.fragment, "frag")
478 self.assertEqual(p.username, None)
479 self.assertEqual(p.password, None)
480 self.assertEqual(p.hostname, "www.python.org")
481 self.assertEqual(p.port, None)
482 # geturl() won't return exactly the original URL in this case
483 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000484 # We handle this by ignoring the first 4 characters of the URL
485 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000486
487 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000488 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000489 self.assertEqual(p.scheme, "http")
490 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
491 self.assertEqual(p.path, "/doc/")
492 self.assertEqual(p.query, "query=yes")
493 self.assertEqual(p.fragment, "frag")
494 self.assertEqual(p.username, "User")
495 self.assertEqual(p.password, "Pass")
496 self.assertEqual(p.hostname, "www.python.org")
497 self.assertEqual(p.port, 80)
498 self.assertEqual(p.geturl(), url)
499
Christian Heimesfaf2f632008-01-06 16:59:19 +0000500 # Addressing issue1698, which suggests Username can contain
501 # "@" characters. Though not RFC compliant, many ftp sites allow
502 # and request email addresses as usernames.
503
504 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000505 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000506 self.assertEqual(p.scheme, "http")
507 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
508 self.assertEqual(p.path, "/doc/")
509 self.assertEqual(p.query, "query=yes")
510 self.assertEqual(p.fragment, "frag")
511 self.assertEqual(p.username, "User@example.com")
512 self.assertEqual(p.password, "Pass")
513 self.assertEqual(p.hostname, "www.python.org")
514 self.assertEqual(p.port, 80)
515 self.assertEqual(p.geturl(), url)
516
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000517 # And check them all again, only with bytes this time
518 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
519 p = urllib.parse.urlsplit(url)
520 self.assertEqual(p.scheme, b"http")
521 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
522 self.assertEqual(p.path, b"/doc/")
523 self.assertEqual(p.query, b"")
524 self.assertEqual(p.fragment, b"frag")
525 self.assertEqual(p.username, None)
526 self.assertEqual(p.password, None)
527 self.assertEqual(p.hostname, b"www.python.org")
528 self.assertEqual(p.port, None)
529 self.assertEqual(p.geturl()[4:], url[4:])
530
531 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
532 p = urllib.parse.urlsplit(url)
533 self.assertEqual(p.scheme, b"http")
534 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
535 self.assertEqual(p.path, b"/doc/")
536 self.assertEqual(p.query, b"query=yes")
537 self.assertEqual(p.fragment, b"frag")
538 self.assertEqual(p.username, b"User")
539 self.assertEqual(p.password, b"Pass")
540 self.assertEqual(p.hostname, b"www.python.org")
541 self.assertEqual(p.port, 80)
542 self.assertEqual(p.geturl(), url)
543
544 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
545 p = urllib.parse.urlsplit(url)
546 self.assertEqual(p.scheme, b"http")
547 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
548 self.assertEqual(p.path, b"/doc/")
549 self.assertEqual(p.query, b"query=yes")
550 self.assertEqual(p.fragment, b"frag")
551 self.assertEqual(p.username, b"User@example.com")
552 self.assertEqual(p.password, b"Pass")
553 self.assertEqual(p.hostname, b"www.python.org")
554 self.assertEqual(p.port, 80)
555 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000556
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800557 # Verify an illegal port is returned as None
558 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
559 p = urllib.parse.urlsplit(url)
560 self.assertEqual(p.port, None)
561
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000562 def test_attributes_bad_port(self):
563 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000564 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000565 self.assertEqual(p.netloc, "www.example.net:foo")
566 self.assertRaises(ValueError, lambda: p.port)
567
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000568 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000569 self.assertEqual(p.netloc, "www.example.net:foo")
570 self.assertRaises(ValueError, lambda: p.port)
571
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000572 # Once again, repeat ourselves to test bytes
573 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
574 self.assertEqual(p.netloc, b"www.example.net:foo")
575 self.assertRaises(ValueError, lambda: p.port)
576
577 p = urllib.parse.urlparse(b"http://www.example.net:foo")
578 self.assertEqual(p.netloc, b"www.example.net:foo")
579 self.assertRaises(ValueError, lambda: p.port)
580
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000581 def test_attributes_without_netloc(self):
582 # This example is straight from RFC 3261. It looks like it
583 # should allow the username, hostname, and port to be filled
584 # in, but doesn't. Since it's a URI and doesn't use the
585 # scheme://netloc syntax, the netloc and related attributes
586 # should be left empty.
587 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000588 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000589 self.assertEqual(p.netloc, "")
590 self.assertEqual(p.username, None)
591 self.assertEqual(p.password, None)
592 self.assertEqual(p.hostname, None)
593 self.assertEqual(p.port, None)
594 self.assertEqual(p.geturl(), uri)
595
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000596 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000597 self.assertEqual(p.netloc, "")
598 self.assertEqual(p.username, None)
599 self.assertEqual(p.password, None)
600 self.assertEqual(p.hostname, None)
601 self.assertEqual(p.port, None)
602 self.assertEqual(p.geturl(), uri)
603
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000604 # You guessed it, repeating the test with bytes input
605 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
606 p = urllib.parse.urlsplit(uri)
607 self.assertEqual(p.netloc, b"")
608 self.assertEqual(p.username, None)
609 self.assertEqual(p.password, None)
610 self.assertEqual(p.hostname, None)
611 self.assertEqual(p.port, None)
612 self.assertEqual(p.geturl(), uri)
613
614 p = urllib.parse.urlparse(uri)
615 self.assertEqual(p.netloc, b"")
616 self.assertEqual(p.username, None)
617 self.assertEqual(p.password, None)
618 self.assertEqual(p.hostname, None)
619 self.assertEqual(p.port, None)
620 self.assertEqual(p.geturl(), uri)
621
Christian Heimesfaf2f632008-01-06 16:59:19 +0000622 def test_noslash(self):
623 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000624 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000625 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000626 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
627 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000628
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000629 def test_withoutscheme(self):
630 # Test urlparse without scheme
631 # Issue 754016: urlparse goes wrong with IP:port without scheme
632 # RFC 1808 specifies that netloc should start with //, urlparse expects
633 # the same, otherwise it classifies the portion of url as path.
634 self.assertEqual(urllib.parse.urlparse("path"),
635 ('','','path','','',''))
636 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
637 ('','www.python.org:80','','','',''))
638 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
639 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000640 # Repeat for bytes input
641 self.assertEqual(urllib.parse.urlparse(b"path"),
642 (b'',b'',b'path',b'',b'',b''))
643 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
644 (b'',b'www.python.org:80',b'',b'',b'',b''))
645 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
646 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000647
648 def test_portseparator(self):
649 # Issue 754016 makes changes for port separator ':' from scheme separator
650 self.assertEqual(urllib.parse.urlparse("path:80"),
651 ('','','path:80','','',''))
652 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
653 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
654 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
655 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000656 # As usual, need to check bytes input as well
657 self.assertEqual(urllib.parse.urlparse(b"path:80"),
658 (b'',b'',b'path:80',b'',b'',b''))
659 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
660 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
661 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
662 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000663
Facundo Batista2ac5de22008-07-07 18:24:11 +0000664 def test_usingsys(self):
665 # Issue 3314: sys module is used in the error
666 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
667
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000668 def test_anyscheme(self):
669 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000670 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
671 ('s3', 'foo.com', '/stuff', '', '', ''))
672 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
673 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800674 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
675 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
676 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
677 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
678
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000679 # And for bytes...
680 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
681 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
682 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
683 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800684 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
685 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
686 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
687 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000688
Berker Peksag89584c92015-06-25 23:38:48 +0300689 def test_default_scheme(self):
690 # Exercise the scheme parameter of urlparse() and urlsplit()
691 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
692 with self.subTest(function=func):
693 result = func("http://example.net/", "ftp")
694 self.assertEqual(result.scheme, "http")
695 result = func(b"http://example.net/", b"ftp")
696 self.assertEqual(result.scheme, b"http")
697 self.assertEqual(func("path", "ftp").scheme, "ftp")
698 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
699 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
700 self.assertEqual(func("path").scheme, "")
701 self.assertEqual(func(b"path").scheme, b"")
702 self.assertEqual(func(b"path", "").scheme, b"")
703
704 def test_parse_fragments(self):
705 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
706 tests = (
707 ("http:#frag", "path"),
708 ("//example.net#frag", "path"),
709 ("index.html#frag", "path"),
710 (";a=b#frag", "params"),
711 ("?a=b#frag", "query"),
712 ("#frag", "path"),
713 )
714 for url, attr in tests:
715 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
716 if attr == "params" and func is urllib.parse.urlsplit:
717 attr = "path"
718 with self.subTest(url=url, function=func):
719 result = func(url, allow_fragments=False)
720 self.assertEqual(result.fragment, "")
721 self.assertTrue(getattr(result, attr).endswith("#frag"))
722 self.assertEqual(func(url, "", False).fragment, "")
723
724 result = func(url, allow_fragments=True)
725 self.assertEqual(result.fragment, "frag")
726 self.assertFalse(getattr(result, attr).endswith("frag"))
727 self.assertEqual(func(url, "", True).fragment, "frag")
728 self.assertEqual(func(url).fragment, "frag")
729
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000730 def test_mixed_types_rejected(self):
731 # Several functions that process either strings or ASCII encoded bytes
732 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000733 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000734 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000735 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000736 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000737 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000738 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000739 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000740 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000741 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000742 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000743 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000744 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000745 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000746 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000747 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000748 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000749 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000750 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000751 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000752 urllib.parse.urljoin(b"http://python.org", "http://python.org")
753
754 def _check_result_type(self, str_type):
755 num_args = len(str_type._fields)
756 bytes_type = str_type._encoded_counterpart
757 self.assertIs(bytes_type._decoded_counterpart, str_type)
758 str_args = ('',) * num_args
759 bytes_args = (b'',) * num_args
760 str_result = str_type(*str_args)
761 bytes_result = bytes_type(*bytes_args)
762 encoding = 'ascii'
763 errors = 'strict'
764 self.assertEqual(str_result, str_args)
765 self.assertEqual(bytes_result.decode(), str_args)
766 self.assertEqual(bytes_result.decode(), str_result)
767 self.assertEqual(bytes_result.decode(encoding), str_args)
768 self.assertEqual(bytes_result.decode(encoding), str_result)
769 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
770 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
771 self.assertEqual(bytes_result, bytes_args)
772 self.assertEqual(str_result.encode(), bytes_args)
773 self.assertEqual(str_result.encode(), bytes_result)
774 self.assertEqual(str_result.encode(encoding), bytes_args)
775 self.assertEqual(str_result.encode(encoding), bytes_result)
776 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
777 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
778
779 def test_result_pairs(self):
780 # Check encoding and decoding between result pairs
781 result_types = [
782 urllib.parse.DefragResult,
783 urllib.parse.SplitResult,
784 urllib.parse.ParseResult,
785 ]
786 for result_type in result_types:
787 self._check_result_type(result_type)
788
Victor Stinner1d87deb2011-01-14 13:05:19 +0000789 def test_parse_qs_encoding(self):
790 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
791 self.assertEqual(result, {'key': ['\u0141\xE9']})
792 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
793 self.assertEqual(result, {'key': ['\u0141\xE9']})
794 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
795 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
796 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
797 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
798 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
799 errors="ignore")
800 self.assertEqual(result, {'key': ['\u0141-']})
801
802 def test_parse_qsl_encoding(self):
803 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
804 self.assertEqual(result, [('key', '\u0141\xE9')])
805 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
806 self.assertEqual(result, [('key', '\u0141\xE9')])
807 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
808 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
809 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
810 self.assertEqual(result, [('key', '\u0141\ufffd-')])
811 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
812 errors="ignore")
813 self.assertEqual(result, [('key', '\u0141-')])
814
Senthil Kumarande02a712011-07-23 18:27:45 +0800815 def test_urlencode_sequences(self):
816 # Other tests incidentally urlencode things; test non-covered cases:
817 # Sequence and object values.
818 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100819 # we cannot rely on ordering here
820 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800821
822 class Trivial:
823 def __str__(self):
824 return 'trivial'
825
826 result = urllib.parse.urlencode({'a': Trivial()}, True)
827 self.assertEqual(result, 'a=trivial')
828
R David Murrayc17686f2015-05-17 20:44:50 -0400829 def test_urlencode_quote_via(self):
830 result = urllib.parse.urlencode({'a': 'some value'})
831 self.assertEqual(result, "a=some+value")
832 result = urllib.parse.urlencode({'a': 'some value/another'},
833 quote_via=urllib.parse.quote)
834 self.assertEqual(result, "a=some%20value%2Fanother")
835 result = urllib.parse.urlencode({'a': 'some value/another'},
836 safe='/', quote_via=urllib.parse.quote)
837 self.assertEqual(result, "a=some%20value/another")
838
Senthil Kumarande02a712011-07-23 18:27:45 +0800839 def test_quote_from_bytes(self):
840 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
841 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
842 self.assertEqual(result, 'archaeological%20arcana')
843 result = urllib.parse.quote_from_bytes(b'')
844 self.assertEqual(result, '')
845
846 def test_unquote_to_bytes(self):
847 result = urllib.parse.unquote_to_bytes('abc%20def')
848 self.assertEqual(result, b'abc def')
849 result = urllib.parse.unquote_to_bytes('')
850 self.assertEqual(result, b'')
851
852 def test_quote_errors(self):
853 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
854 encoding='utf-8')
855 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000856
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300857 def test_issue14072(self):
858 p1 = urllib.parse.urlsplit('tel:+31-641044153')
859 self.assertEqual(p1.scheme, 'tel')
860 self.assertEqual(p1.path, '+31-641044153')
861 p2 = urllib.parse.urlsplit('tel:+31641044153')
862 self.assertEqual(p2.scheme, 'tel')
863 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800864 # assert the behavior for urlparse
865 p1 = urllib.parse.urlparse('tel:+31-641044153')
866 self.assertEqual(p1.scheme, 'tel')
867 self.assertEqual(p1.path, '+31-641044153')
868 p2 = urllib.parse.urlparse('tel:+31641044153')
869 self.assertEqual(p2.scheme, 'tel')
870 self.assertEqual(p2.path, '+31641044153')
871
872 def test_telurl_params(self):
873 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
874 self.assertEqual(p1.scheme, 'tel')
875 self.assertEqual(p1.path, '123-4')
876 self.assertEqual(p1.params, 'phone-context=+1-650-516')
877
878 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
879 self.assertEqual(p1.scheme, 'tel')
880 self.assertEqual(p1.path, '+1-201-555-0123')
881 self.assertEqual(p1.params, '')
882
883 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
884 self.assertEqual(p1.scheme, 'tel')
885 self.assertEqual(p1.path, '7042')
886 self.assertEqual(p1.params, 'phone-context=example.com')
887
888 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
889 self.assertEqual(p1.scheme, 'tel')
890 self.assertEqual(p1.path, '863-1234')
891 self.assertEqual(p1.params, 'phone-context=+1-914-555')
892
R David Murrayf5163882013-03-21 20:56:51 -0400893 def test_Quoter_repr(self):
894 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
895 self.assertIn('Quoter', repr(quoter))
896
Serhiy Storchaka15154502015-04-07 19:09:01 +0300897 def test_all(self):
898 expected = []
899 undocumented = {
900 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
901 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
902 'splitvalue',
903 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
904 }
905 for name in dir(urllib.parse):
906 if name.startswith('_') or name in undocumented:
907 continue
908 object = getattr(urllib.parse, name)
909 if getattr(object, '__module__', None) == 'urllib.parse':
910 expected.append(name)
911 self.assertCountEqual(urllib.parse.__all__, expected)
912
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000913
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200914class Utility_Tests(unittest.TestCase):
915 """Testcase to test the various utility functions in the urllib."""
916 # In Python 2 this test class was in test_urllib.
917
918 def test_splittype(self):
919 splittype = urllib.parse.splittype
920 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
921 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
922 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
923 self.assertEqual(splittype('type:'), ('type', ''))
924 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
925
926 def test_splithost(self):
927 splithost = urllib.parse.splithost
928 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
929 ('www.example.org:80', '/foo/bar/baz.html'))
930 self.assertEqual(splithost('//www.example.org:80'),
931 ('www.example.org:80', ''))
932 self.assertEqual(splithost('/foo/bar/baz.html'),
933 (None, '/foo/bar/baz.html'))
934
935 def test_splituser(self):
936 splituser = urllib.parse.splituser
937 self.assertEqual(splituser('User:Pass@www.python.org:080'),
938 ('User:Pass', 'www.python.org:080'))
939 self.assertEqual(splituser('@www.python.org:080'),
940 ('', 'www.python.org:080'))
941 self.assertEqual(splituser('www.python.org:080'),
942 (None, 'www.python.org:080'))
943 self.assertEqual(splituser('User:Pass@'),
944 ('User:Pass', ''))
945 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
946 ('User@example.com:Pass', 'www.python.org:080'))
947
948 def test_splitpasswd(self):
949 # Some of the password examples are not sensible, but it is added to
950 # confirming to RFC2617 and addressing issue4675.
951 splitpasswd = urllib.parse.splitpasswd
952 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
953 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
954 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
955 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
956 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
957 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
958 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
959 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
960 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
961 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
962 self.assertEqual(splitpasswd('user:'), ('user', ''))
963 self.assertEqual(splitpasswd('user'), ('user', None))
964 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
965
966 def test_splitport(self):
967 splitport = urllib.parse.splitport
968 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
969 self.assertEqual(splitport('parrot'), ('parrot', None))
970 self.assertEqual(splitport('parrot:'), ('parrot', None))
971 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
972 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
973 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
974 self.assertEqual(splitport('[::1]'), ('[::1]', None))
975 self.assertEqual(splitport(':88'), ('', '88'))
976
977 def test_splitnport(self):
978 splitnport = urllib.parse.splitnport
979 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
980 self.assertEqual(splitnport('parrot'), ('parrot', -1))
981 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
982 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
983 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
984 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
985 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
986 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
987 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
988
989 def test_splitquery(self):
990 # Normal cases are exercised by other tests; ensure that we also
991 # catch cases with no port specified (testcase ensuring coverage)
992 splitquery = urllib.parse.splitquery
993 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
994 ('http://python.org/fake', 'foo=bar'))
995 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
996 ('http://python.org/fake?foo=bar', ''))
997 self.assertEqual(splitquery('http://python.org/fake'),
998 ('http://python.org/fake', None))
999 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1000
1001 def test_splittag(self):
1002 splittag = urllib.parse.splittag
1003 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1004 ('http://example.com?foo=bar', 'baz'))
1005 self.assertEqual(splittag('http://example.com?foo=bar#'),
1006 ('http://example.com?foo=bar', ''))
1007 self.assertEqual(splittag('#baz'), ('', 'baz'))
1008 self.assertEqual(splittag('http://example.com?foo=bar'),
1009 ('http://example.com?foo=bar', None))
1010 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1011 ('http://example.com?foo=bar#baz', 'boo'))
1012
1013 def test_splitattr(self):
1014 splitattr = urllib.parse.splitattr
1015 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1016 ('/path', ['attr1=value1', 'attr2=value2']))
1017 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1018 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1019 ('', ['attr1=value1', 'attr2=value2']))
1020 self.assertEqual(splitattr('/path'), ('/path', []))
1021
1022 def test_splitvalue(self):
1023 # Normal cases are exercised by other tests; test pathological cases
1024 # with no key/value pairs. (testcase ensuring coverage)
1025 splitvalue = urllib.parse.splitvalue
1026 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1027 self.assertEqual(splitvalue('foo='), ('foo', ''))
1028 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1029 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1030 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1031
1032 def test_to_bytes(self):
1033 result = urllib.parse.to_bytes('http://www.python.org')
1034 self.assertEqual(result, 'http://www.python.org')
1035 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
1036 'http://www.python.org/medi\u00e6val')
1037
1038 def test_unwrap(self):
1039 url = urllib.parse.unwrap('<URL:type://host/path>')
1040 self.assertEqual(url, 'type://host/path')
1041
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001042
1043if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +02001044 unittest.main()