blob: 4e1ded73c2664347f16e771fd7a4ed7bf584fbf9 [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001from test import test_support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002import unittest
Fred Drakea4d18a02001-01-05 05:57:04 +00003import urlparse
4
Fred Drakea4d18a02001-01-05 05:57:04 +00005RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00006RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarane41bb0b2010-05-07 04:07:29 +00007RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaran9a5bc1d2010-07-14 10:39:35 +00008SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00009
Ezio Melotti1e87da12011-10-19 10:39:35 +030010# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac585df92008-09-03 22:35:50 +000011# a string with the query and a dictionary with the expected result.
12
13parse_qsl_test_cases = [
14 ("", []),
15 ("&", []),
16 ("&&", []),
17 ("=", [('', '')]),
18 ("=a", [('', 'a')]),
19 ("a", [('a', '')]),
20 ("a=", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("&a=b", [('a', 'b')]),
23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Senthil Kumaranaeff57d2016-04-16 07:15:38 -070025 (";", []),
26 (";;", []),
27 (";a=b", [('a', 'b')]),
28 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
29 ("a=1;a=2", [('a', '1'), ('a', '2')]),
30 (b";", []),
31 (b";;", []),
32 (b";a=b", [(b'a', b'b')]),
33 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
35]
36
37parse_qs_test_cases = [
38 ("", {}),
39 ("&", {}),
40 ("&&", {}),
41 ("=", {'': ['']}),
42 ("=a", {'': ['a']}),
43 ("a", {'a': ['']}),
44 ("a=", {'a': ['']}),
45 ("&a=b", {'a': ['b']}),
46 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
47 ("a=1&a=2", {'a': ['1', '2']}),
48 (b"", {}),
49 (b"&", {}),
50 (b"&&", {}),
51 (b"=", {b'': [b'']}),
52 (b"=a", {b'': [b'a']}),
53 (b"a", {b'a': [b'']}),
54 (b"a=", {b'a': [b'']}),
55 (b"&a=b", {b'a': [b'b']}),
56 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
57 (b"a=1&a=2", {b'a': [b'1', b'2']}),
58 (";", {}),
59 (";;", {}),
60 (";a=b", {'a': ['b']}),
61 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
62 ("a=1;a=2", {'a': ['1', '2']}),
63 (b";", {}),
64 (b";;", {}),
65 (b";a=b", {b'a': [b'b']}),
66 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
67 (b"a=1;a=2", {b'a': [b'1', b'2']}),
Facundo Batistac585df92008-09-03 22:35:50 +000068]
69
Skip Montanaro6ec967d2002-03-23 05:32:10 +000070class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000071
72 def checkRoundtrips(self, url, parsed, split):
73 result = urlparse.urlparse(url)
74 self.assertEqual(result, parsed)
Fred Drakead5177c2006-04-01 22:14:43 +000075 t = (result.scheme, result.netloc, result.path,
76 result.params, result.query, result.fragment)
77 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000078 # put it back together and it should be the same
79 result2 = urlparse.urlunparse(result)
80 self.assertEqual(result2, url)
Fred Drakead5177c2006-04-01 22:14:43 +000081 self.assertEqual(result2, result.geturl())
82
83 # the result of geturl() is a fixpoint; we can always parse it
84 # again to get the same result:
85 result3 = urlparse.urlparse(result.geturl())
86 self.assertEqual(result3.geturl(), result.geturl())
87 self.assertEqual(result3, result)
88 self.assertEqual(result3.scheme, result.scheme)
89 self.assertEqual(result3.netloc, result.netloc)
90 self.assertEqual(result3.path, result.path)
91 self.assertEqual(result3.params, result.params)
92 self.assertEqual(result3.query, result.query)
93 self.assertEqual(result3.fragment, result.fragment)
94 self.assertEqual(result3.username, result.username)
95 self.assertEqual(result3.password, result.password)
96 self.assertEqual(result3.hostname, result.hostname)
97 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000098
99 # check the roundtrip using urlsplit() as well
100 result = urlparse.urlsplit(url)
101 self.assertEqual(result, split)
Fred Drakead5177c2006-04-01 22:14:43 +0000102 t = (result.scheme, result.netloc, result.path,
103 result.query, result.fragment)
104 self.assertEqual(t, split)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000105 result2 = urlparse.urlunsplit(result)
106 self.assertEqual(result2, url)
Fred Drakead5177c2006-04-01 22:14:43 +0000107 self.assertEqual(result2, result.geturl())
108
109 # check the fixpoint property of re-parsing the result of geturl()
110 result3 = urlparse.urlsplit(result.geturl())
111 self.assertEqual(result3.geturl(), result.geturl())
112 self.assertEqual(result3, result)
113 self.assertEqual(result3.scheme, result.scheme)
114 self.assertEqual(result3.netloc, result.netloc)
115 self.assertEqual(result3.path, result.path)
116 self.assertEqual(result3.query, result.query)
117 self.assertEqual(result3.fragment, result.fragment)
118 self.assertEqual(result3.username, result.username)
119 self.assertEqual(result3.password, result.password)
120 self.assertEqual(result3.hostname, result.hostname)
121 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000122
Facundo Batistac585df92008-09-03 22:35:50 +0000123 def test_qsl(self):
124 for orig, expect in parse_qsl_test_cases:
125 result = urlparse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumaran578617a2011-07-23 18:41:43 +0800126 self.assertEqual(result, expect, "Error parsing %r" % orig)
127 expect_without_blanks = [v for v in expect if len(v[1])]
128 result = urlparse.parse_qsl(orig, keep_blank_values=False)
129 self.assertEqual(result, expect_without_blanks,
130 "Error parsing %r" % orig)
131
Senthil Kumaranaeff57d2016-04-16 07:15:38 -0700132 def test_qs(self):
133 for orig, expect in parse_qs_test_cases:
134 result = urlparse.parse_qs(orig, keep_blank_values=True)
135 self.assertEqual(result, expect, "Error parsing %r" % orig)
136 expect_without_blanks = dict(
137 [(v, expect[v]) for v in expect if len(expect[v][0])])
138 result = urlparse.parse_qs(orig, keep_blank_values=False)
139 self.assertEqual(result, expect_without_blanks,
140 "Error parsing %r" % orig)
Facundo Batistac585df92008-09-03 22:35:50 +0000141
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000142 def test_roundtrips(self):
143 testcases = [
Fred Drake70705652002-10-16 21:02:36 +0000144 ('file:///tmp/junk.txt',
145 ('file', '', '/tmp/junk.txt', '', '', ''),
146 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000147 ('imap://mail.python.org/mbox1',
148 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
149 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000150 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000151 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
152 '', '', ''),
153 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
154 '', '')),
Senthil Kumaran5e95e762009-03-30 21:51:50 +0000155 ('nfs://server/path/to/file.txt',
156 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
157 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000158 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
159 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
160 '', '', ''),
161 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaran81a04502010-05-13 03:25:21 +0000162 '', '')),
163 ('git+ssh://git@github.com/user/project.git',
164 ('git+ssh', 'git@github.com','/user/project.git',
165 '','',''),
166 ('git+ssh', 'git@github.com','/user/project.git',
167 '', ''))
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000168 ]
169 for url, parsed, split in testcases:
170 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000171
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000172 def test_http_roundtrips(self):
173 # urlparse.urlsplit treats 'http:' as an optimized special case,
174 # so we test both 'http:' and 'https:' in all the following.
175 # Three cheers for white box knowledge!
176 testcases = [
177 ('://www.python.org',
178 ('www.python.org', '', '', '', ''),
179 ('www.python.org', '', '', '')),
180 ('://www.python.org#abc',
181 ('www.python.org', '', '', '', 'abc'),
182 ('www.python.org', '', '', 'abc')),
183 ('://www.python.org?q=abc',
184 ('www.python.org', '', '', 'q=abc', ''),
185 ('www.python.org', '', 'q=abc', '')),
186 ('://www.python.org/#abc',
187 ('www.python.org', '/', '', '', 'abc'),
188 ('www.python.org', '/', '', 'abc')),
189 ('://a/b/c/d;p?q#f',
190 ('a', '/b/c/d', 'p', 'q', 'f'),
191 ('a', '/b/c/d;p', 'q', 'f')),
192 ]
193 for scheme in ('http', 'https'):
194 for url, parsed, split in testcases:
195 url = scheme + url
196 parsed = (scheme,) + parsed
197 split = (scheme,) + split
198 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000199
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000200 def checkJoin(self, base, relurl, expected):
Guido van Rossumbbc05682002-10-14 19:59:54 +0000201 self.assertEqual(urlparse.urljoin(base, relurl), expected,
202 (base, relurl, expected))
203
204 def test_unparse_parse(self):
Senthil Kumarand10b65e2010-04-12 06:50:24 +0000205 for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
Fred Drake70705652002-10-16 21:02:36 +0000206 self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000207 self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000208
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000209 def test_RFC1808(self):
210 # "normal" cases from RFC 1808:
211 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
212 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
213 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
214 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
215 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
216 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000217 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
218 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
219 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
220 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
221 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
222 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000223 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
224 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
225 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
226 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
227 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
228 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
229 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
230 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
231 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
232 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000233
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000234 # "abnormal" cases from RFC 1808:
235 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
236 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
237 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
238 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
239 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
240 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
241 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
242 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
243 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
244 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
245 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
246 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
247 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000248
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000249 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
250 # so we'll not actually run these tests (which expect 1808 behavior).
251 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
252 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000253
Senthil Kumaranddaea1c2011-04-15 18:07:33 +0800254 def test_RFC2368(self):
255 # Issue 11467: path that starts with a number is not parsed correctly
256 self.assertEqual(urlparse.urlparse('mailto:1337@example.org'),
257 ('mailto', '', '1337@example.org', '', '', ''))
258
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000259 def test_RFC2396(self):
260 # cases from RFC 2396
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000261 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
262 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
263 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
264 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
265 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
266 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
267 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
268 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
269 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
270 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
271 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
272 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
273 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
274 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
275 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
276 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
277 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
278 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
279 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
280 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
281 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
282 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
283 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
284 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
285 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
286 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
287 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
288 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
289 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
290 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
291 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
292 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
293 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
294 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
295 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
296 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
297 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
298 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
299 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
300
Facundo Batista67d19812008-08-14 16:51:00 +0000301 def test_RFC3986(self):
Senthil Kumarane41bb0b2010-05-07 04:07:29 +0000302 # Test cases from RFC3986
Facundo Batista67d19812008-08-14 16:51:00 +0000303 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
304 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarane41bb0b2010-05-07 04:07:29 +0000305 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
306 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
307 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
308 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
309 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
310 self.checkJoin(RFC3986_BASE, '//g','http://g')
311 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
312 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
313 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
314 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
315 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
316 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
317 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
318 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
319 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
320 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
321 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
322 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
323 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
324 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
325 self.checkJoin(RFC3986_BASE, '../..','http://a/')
326 self.checkJoin(RFC3986_BASE, '../../','http://a/')
327 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
328
329 #Abnormal Examples
330
331 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
332 # Tests are here for reference.
333
334 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
335 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
336 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
337 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
338
339 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
340 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
341 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
342 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
343 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
344 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
345 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
346 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
347 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
348 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
349 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
350 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
351 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
352 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
353 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
354 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') # relaxed parser
Facundo Batista67d19812008-08-14 16:51:00 +0000355
Senthil Kumaran5c7fd6e2010-12-17 04:56:02 +0000356 # Test for issue9721
357 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
358
Senthil Kumaran9a5bc1d2010-07-14 10:39:35 +0000359 def test_urljoins(self):
360 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
361 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
362 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
363 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
364 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
365 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
366 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
367 self.checkJoin(SIMPLE_BASE, '//g','http://g')
368 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
369 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
370 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
371 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
372 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
373 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
374 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
375 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
376 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
377 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
378 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
379 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
380 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
381 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
382 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
383 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
384 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
385 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
386 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
387 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
388 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumaran578617a2011-07-23 18:41:43 +0800389 self.checkJoin('http:///', '..','http:///')
390 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
391 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaranf432aec2011-08-03 18:31:59 +0800392 self.checkJoin('svn://pathtorepo/dir1','dir2','svn://pathtorepo/dir2')
Senthil Kumaran28696fe2011-08-03 22:06:05 +0800393 self.checkJoin('svn+ssh://pathtorepo/dir1','dir2','svn+ssh://pathtorepo/dir2')
Senthil Kumaran9a5bc1d2010-07-14 10:39:35 +0000394
Senthil Kumaran8c6d9d72010-04-16 02:46:46 +0000395 def test_RFC2732(self):
396 for url, hostname, port in [
397 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
398 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
399 ('http://[::1]:5432/foo/', '::1', 5432),
400 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
401 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
402 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
403 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
404 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
405 ('http://[::ffff:12.34.56.78]:5432/foo/',
406 '::ffff:12.34.56.78', 5432),
407 ('http://Test.python.org/foo/', 'test.python.org', None),
408 ('http://12.34.56.78/foo/', '12.34.56.78', None),
409 ('http://[::1]/foo/', '::1', None),
410 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
411 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
412 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
413 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
414 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
415 ('http://[::ffff:12.34.56.78]/foo/',
416 '::ffff:12.34.56.78', None),
Serhiy Storchaka326b5ab2014-01-18 18:30:09 +0200417 ('http://Test.python.org:/foo/', 'test.python.org', None),
418 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
419 ('http://[::1]:/foo/', '::1', None),
420 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
421 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
422 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
423 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
424 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
425 ('http://[::ffff:12.34.56.78]:/foo/',
426 '::ffff:12.34.56.78', None),
Senthil Kumaran8c6d9d72010-04-16 02:46:46 +0000427 ]:
428 urlparsed = urlparse.urlparse(url)
429 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
430
431 for invalid_url in [
432 'http://::12.34.56.78]/',
433 'http://[::1/foo/',
Senthil Kumaran39824612010-04-22 12:10:13 +0000434 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran241a0432010-04-20 20:37:59 +0000435 'http://[::1/foo/bad]/bad',
Senthil Kumaran8c6d9d72010-04-16 02:46:46 +0000436 'http://[::ffff:12.34.56.78']:
Senthil Kumaran39824612010-04-22 12:10:13 +0000437 self.assertRaises(ValueError, urlparse.urlparse, invalid_url)
Senthil Kumaran8c6d9d72010-04-16 02:46:46 +0000438
Fred Drake70705652002-10-16 21:02:36 +0000439 def test_urldefrag(self):
440 for url, defrag, frag in [
441 ('http://python.org#frag', 'http://python.org', 'frag'),
442 ('http://python.org', 'http://python.org', ''),
443 ('http://python.org/#frag', 'http://python.org/', 'frag'),
444 ('http://python.org/', 'http://python.org/', ''),
445 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
446 ('http://python.org/?q', 'http://python.org/?q', ''),
447 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
448 ('http://python.org/p?q', 'http://python.org/p?q', ''),
449 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
450 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
451 ]:
452 self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
453
Fred Drakead5177c2006-04-01 22:14:43 +0000454 def test_urlsplit_attributes(self):
455 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
456 p = urlparse.urlsplit(url)
457 self.assertEqual(p.scheme, "http")
458 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
459 self.assertEqual(p.path, "/doc/")
460 self.assertEqual(p.query, "")
461 self.assertEqual(p.fragment, "frag")
462 self.assertEqual(p.username, None)
463 self.assertEqual(p.password, None)
464 self.assertEqual(p.hostname, "www.python.org")
465 self.assertEqual(p.port, None)
466 # geturl() won't return exactly the original URL in this case
467 # since the scheme is always case-normalized
468 #self.assertEqual(p.geturl(), url)
469
470 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
471 p = urlparse.urlsplit(url)
472 self.assertEqual(p.scheme, "http")
473 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
474 self.assertEqual(p.path, "/doc/")
475 self.assertEqual(p.query, "query=yes")
476 self.assertEqual(p.fragment, "frag")
477 self.assertEqual(p.username, "User")
478 self.assertEqual(p.password, "Pass")
479 self.assertEqual(p.hostname, "www.python.org")
480 self.assertEqual(p.port, 80)
481 self.assertEqual(p.geturl(), url)
482
Guido van Rossumced4eb02008-01-05 01:21:57 +0000483 # Addressing issue1698, which suggests Username can contain
Andrew M. Kuchling05899142008-01-05 15:13:49 +0000484 # "@" characters. Though not RFC compliant, many ftp sites allow
Fred Drakef7476c42008-01-05 04:38:38 +0000485 # and request email addresses as usernames.
Guido van Rossumced4eb02008-01-05 01:21:57 +0000486
487 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
488 p = urlparse.urlsplit(url)
489 self.assertEqual(p.scheme, "http")
490 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
491 self.assertEqual(p.path, "/doc/")
492 self.assertEqual(p.query, "query=yes")
493 self.assertEqual(p.fragment, "frag")
494 self.assertEqual(p.username, "User@example.com")
495 self.assertEqual(p.password, "Pass")
496 self.assertEqual(p.hostname, "www.python.org")
497 self.assertEqual(p.port, 80)
498 self.assertEqual(p.geturl(), url)
499
Senthil Kumaran37484dc2012-05-24 21:54:34 +0800500 # Verify an illegal port of value greater than 65535 is set as None
501 url = "http://www.python.org:65536"
502 p = urlparse.urlsplit(url)
503 self.assertEqual(p.port, None)
504
Ezio Melotti6d9c1b12012-05-19 17:12:17 +0300505 def test_issue14072(self):
506 p1 = urlparse.urlsplit('tel:+31-641044153')
507 self.assertEqual(p1.scheme, 'tel')
508 self.assertEqual(p1.path, '+31-641044153')
Senthil Kumaran1974baa2012-12-24 13:56:54 -0800509
Ezio Melotti6d9c1b12012-05-19 17:12:17 +0300510 p2 = urlparse.urlsplit('tel:+31641044153')
511 self.assertEqual(p2.scheme, 'tel')
512 self.assertEqual(p2.path, '+31641044153')
Guido van Rossumced4eb02008-01-05 01:21:57 +0000513
Senthil Kumaran1974baa2012-12-24 13:56:54 -0800514 # Assert for urlparse
515 p1 = urlparse.urlparse('tel:+31-641044153')
516 self.assertEqual(p1.scheme, 'tel')
517 self.assertEqual(p1.path, '+31-641044153')
518
519 p2 = urlparse.urlparse('tel:+31641044153')
520 self.assertEqual(p2.scheme, 'tel')
521 self.assertEqual(p2.path, '+31641044153')
522
523
524 def test_telurl_params(self):
525 p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516')
526 self.assertEqual(p1.scheme, 'tel')
527 self.assertEqual(p1.path, '123-4')
528 self.assertEqual(p1.params, 'phone-context=+1-650-516')
529
530 p1 = urlparse.urlparse('tel:+1-201-555-0123')
531 self.assertEqual(p1.scheme, 'tel')
532 self.assertEqual(p1.path, '+1-201-555-0123')
533 self.assertEqual(p1.params, '')
534
535 p1 = urlparse.urlparse('tel:7042;phone-context=example.com')
536 self.assertEqual(p1.scheme, 'tel')
537 self.assertEqual(p1.path, '7042')
538 self.assertEqual(p1.params, 'phone-context=example.com')
539
540 p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555')
541 self.assertEqual(p1.scheme, 'tel')
542 self.assertEqual(p1.path, '863-1234')
543 self.assertEqual(p1.params, 'phone-context=+1-914-555')
544
545
Fred Drakead5177c2006-04-01 22:14:43 +0000546 def test_attributes_bad_port(self):
547 """Check handling of non-integer ports."""
548 p = urlparse.urlsplit("http://www.example.net:foo")
549 self.assertEqual(p.netloc, "www.example.net:foo")
550 self.assertRaises(ValueError, lambda: p.port)
551
552 p = urlparse.urlparse("http://www.example.net:foo")
553 self.assertEqual(p.netloc, "www.example.net:foo")
554 self.assertRaises(ValueError, lambda: p.port)
555
556 def test_attributes_without_netloc(self):
557 # This example is straight from RFC 3261. It looks like it
558 # should allow the username, hostname, and port to be filled
559 # in, but doesn't. Since it's a URI and doesn't use the
560 # scheme://netloc syntax, the netloc and related attributes
561 # should be left empty.
562 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
563 p = urlparse.urlsplit(uri)
564 self.assertEqual(p.netloc, "")
565 self.assertEqual(p.username, None)
566 self.assertEqual(p.password, None)
567 self.assertEqual(p.hostname, None)
568 self.assertEqual(p.port, None)
569 self.assertEqual(p.geturl(), uri)
570
571 p = urlparse.urlparse(uri)
572 self.assertEqual(p.netloc, "")
573 self.assertEqual(p.username, None)
574 self.assertEqual(p.password, None)
575 self.assertEqual(p.hostname, None)
576 self.assertEqual(p.port, None)
577 self.assertEqual(p.geturl(), uri)
578
Alexandre Vassalotti2f9ca292007-12-13 17:58:23 +0000579 def test_caching(self):
580 # Test case for bug #1313119
581 uri = "http://example.com/doc/"
582 unicode_uri = unicode(uri)
583
584 urlparse.urlparse(unicode_uri)
585 p = urlparse.urlparse(uri)
586 self.assertEqual(type(p.scheme), type(uri))
587 self.assertEqual(type(p.hostname), type(uri))
588 self.assertEqual(type(p.path), type(uri))
Fred Drakead5177c2006-04-01 22:14:43 +0000589
Guido van Rossumc6a04c22008-01-05 22:19:06 +0000590 def test_noslash(self):
591 # Issue 1637: http://foo.com?query is legal
592 self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
593 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
594
Senthil Kumaran4e78de82010-02-19 07:32:48 +0000595 def test_anyscheme(self):
596 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
597 self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"),
598 ('s3','foo.com','/stuff','','',''))
599 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
600 ('x-newscheme','foo.com','/stuff','','',''))
Senthil Kumaranea24dda2012-05-19 08:10:40 +0800601 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
602 ('x-newscheme','foo.com','/stuff','','query','fragment'))
603 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"),
604 ('x-newscheme','foo.com','/stuff','','query',''))
Senthil Kumaran4e78de82010-02-19 07:32:48 +0000605
Senthil Kumaran0b5019f2010-08-04 04:45:31 +0000606 def test_withoutscheme(self):
607 # Test urlparse without scheme
608 # Issue 754016: urlparse goes wrong with IP:port without scheme
609 # RFC 1808 specifies that netloc should start with //, urlparse expects
610 # the same, otherwise it classifies the portion of url as path.
611 self.assertEqual(urlparse.urlparse("path"),
612 ('','','path','','',''))
613 self.assertEqual(urlparse.urlparse("//www.python.org:80"),
614 ('','www.python.org:80','','','',''))
615 self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
616 ('http','www.python.org:80','','','',''))
617
618 def test_portseparator(self):
619 # Issue 754016 makes changes for port separator ':' from scheme separator
620 self.assertEqual(urlparse.urlparse("path:80"),
621 ('','','path:80','','',''))
622 self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
623 self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
624 self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
625 ('http','www.python.org:80','','','',''))
Senthil Kumaran4e78de82010-02-19 07:32:48 +0000626
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000627def test_main():
628 test_support.run_unittest(UrlParseTestCase)
629
630if __name__ == "__main__":
631 test_main()