blob: c92b5aad64c266425a83db1b77b840c6f379729e [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001#! /usr/bin/env python
2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Fred Drakea4d18a02001-01-05 05:57:04 +00009
Skip Montanaro6ec967d2002-03-23 05:32:10 +000010class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000011
12 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000013 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000014 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000015 t = (result.scheme, result.netloc, result.path,
16 result.params, result.query, result.fragment)
17 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000018 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000019 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000020 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 self.assertEqual(result2, result.geturl())
22
23 # the result of geturl() is a fixpoint; we can always parse it
24 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000025 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000026 self.assertEqual(result3.geturl(), result.geturl())
27 self.assertEqual(result3, result)
28 self.assertEqual(result3.scheme, result.scheme)
29 self.assertEqual(result3.netloc, result.netloc)
30 self.assertEqual(result3.path, result.path)
31 self.assertEqual(result3.params, result.params)
32 self.assertEqual(result3.query, result.query)
33 self.assertEqual(result3.fragment, result.fragment)
34 self.assertEqual(result3.username, result.username)
35 self.assertEqual(result3.password, result.password)
36 self.assertEqual(result3.hostname, result.hostname)
37 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000038
39 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000040 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000042 t = (result.scheme, result.netloc, result.path,
43 result.query, result.fragment)
44 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000045 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000046 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000047 self.assertEqual(result2, result.geturl())
48
49 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000051 self.assertEqual(result3.geturl(), result.geturl())
52 self.assertEqual(result3, result)
53 self.assertEqual(result3.scheme, result.scheme)
54 self.assertEqual(result3.netloc, result.netloc)
55 self.assertEqual(result3.path, result.path)
56 self.assertEqual(result3.query, result.query)
57 self.assertEqual(result3.fragment, result.fragment)
58 self.assertEqual(result3.username, result.username)
59 self.assertEqual(result3.password, result.password)
60 self.assertEqual(result3.hostname, result.hostname)
61 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000062
63 def test_roundtrips(self):
64 testcases = [
Fred Drake70705652002-10-16 21:02:36 +000065 ('file:///tmp/junk.txt',
66 ('file', '', '/tmp/junk.txt', '', '', ''),
67 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +000068 ('imap://mail.python.org/mbox1',
69 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
70 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +000071 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000072 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
73 '', '', ''),
74 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
75 '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +000076 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
77 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
78 '', '', ''),
79 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
80 '', ''))
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000081 ]
82 for url, parsed, split in testcases:
83 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +000084
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000085 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000086 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000087 # so we test both 'http:' and 'https:' in all the following.
88 # Three cheers for white box knowledge!
89 testcases = [
90 ('://www.python.org',
91 ('www.python.org', '', '', '', ''),
92 ('www.python.org', '', '', '')),
93 ('://www.python.org#abc',
94 ('www.python.org', '', '', '', 'abc'),
95 ('www.python.org', '', '', 'abc')),
96 ('://www.python.org?q=abc',
97 ('www.python.org', '', '', 'q=abc', ''),
98 ('www.python.org', '', 'q=abc', '')),
99 ('://www.python.org/#abc',
100 ('www.python.org', '/', '', '', 'abc'),
101 ('www.python.org', '/', '', 'abc')),
102 ('://a/b/c/d;p?q#f',
103 ('a', '/b/c/d', 'p', 'q', 'f'),
104 ('a', '/b/c/d;p', 'q', 'f')),
105 ]
106 for scheme in ('http', 'https'):
107 for url, parsed, split in testcases:
108 url = scheme + url
109 parsed = (scheme,) + parsed
110 split = (scheme,) + split
111 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000112
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000113 def checkJoin(self, base, relurl, expected):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
Guido van Rossumbbc05682002-10-14 19:59:54 +0000115 (base, relurl, expected))
116
117 def test_unparse_parse(self):
118 for u in ['Python', './Python']:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000119 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
120 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000121
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000122 def test_RFC1808(self):
123 # "normal" cases from RFC 1808:
124 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
125 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
126 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
127 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
128 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
129 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000130 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
131 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
132 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
133 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
134 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
135 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000136 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
137 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
138 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
139 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
140 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
141 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
142 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
143 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
144 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
145 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000146
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000147 # "abnormal" cases from RFC 1808:
148 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
149 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
150 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
151 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
152 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
153 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
154 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
155 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
156 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
157 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
158 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
159 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
160 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000161
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000162 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
163 # so we'll not actually run these tests (which expect 1808 behavior).
164 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
165 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000166
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000167 def test_RFC2396(self):
168 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000169
Brett Cannon82860df2003-10-12 04:29:10 +0000170 self.checkJoin(RFC2396_BASE, '?y', 'http://a/b/c/?y')
171 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000172
173 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
174 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
175 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
176 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
177 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
178 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
179 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
180 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
181 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
182 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
183 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
184 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
185 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
186 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
187 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
188 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
189 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
190 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
191 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
192 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
193 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
194 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
195 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
196 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
197 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
198 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
199 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
200 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
201 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
202 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
203 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
204 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
205 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
206 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
207 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
208 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
209 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
210 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
211 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
212
Fred Drake70705652002-10-16 21:02:36 +0000213 def test_urldefrag(self):
214 for url, defrag, frag in [
215 ('http://python.org#frag', 'http://python.org', 'frag'),
216 ('http://python.org', 'http://python.org', ''),
217 ('http://python.org/#frag', 'http://python.org/', 'frag'),
218 ('http://python.org/', 'http://python.org/', ''),
219 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
220 ('http://python.org/?q', 'http://python.org/?q', ''),
221 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
222 ('http://python.org/p?q', 'http://python.org/p?q', ''),
223 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
224 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
225 ]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000226 self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
Fred Drake70705652002-10-16 21:02:36 +0000227
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000228 def test_urlsplit_attributes(self):
229 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000230 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000231 self.assertEqual(p.scheme, "http")
232 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
233 self.assertEqual(p.path, "/doc/")
234 self.assertEqual(p.query, "")
235 self.assertEqual(p.fragment, "frag")
236 self.assertEqual(p.username, None)
237 self.assertEqual(p.password, None)
238 self.assertEqual(p.hostname, "www.python.org")
239 self.assertEqual(p.port, None)
240 # geturl() won't return exactly the original URL in this case
241 # since the scheme is always case-normalized
242 #self.assertEqual(p.geturl(), url)
243
244 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000245 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000246 self.assertEqual(p.scheme, "http")
247 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
248 self.assertEqual(p.path, "/doc/")
249 self.assertEqual(p.query, "query=yes")
250 self.assertEqual(p.fragment, "frag")
251 self.assertEqual(p.username, "User")
252 self.assertEqual(p.password, "Pass")
253 self.assertEqual(p.hostname, "www.python.org")
254 self.assertEqual(p.port, 80)
255 self.assertEqual(p.geturl(), url)
256
Christian Heimesfaf2f632008-01-06 16:59:19 +0000257 # Addressing issue1698, which suggests Username can contain
258 # "@" characters. Though not RFC compliant, many ftp sites allow
259 # and request email addresses as usernames.
260
261 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000262 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000263 self.assertEqual(p.scheme, "http")
264 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
265 self.assertEqual(p.path, "/doc/")
266 self.assertEqual(p.query, "query=yes")
267 self.assertEqual(p.fragment, "frag")
268 self.assertEqual(p.username, "User@example.com")
269 self.assertEqual(p.password, "Pass")
270 self.assertEqual(p.hostname, "www.python.org")
271 self.assertEqual(p.port, 80)
272 self.assertEqual(p.geturl(), url)
273
274
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000275 def test_attributes_bad_port(self):
276 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000277 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000278 self.assertEqual(p.netloc, "www.example.net:foo")
279 self.assertRaises(ValueError, lambda: p.port)
280
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000281 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000282 self.assertEqual(p.netloc, "www.example.net:foo")
283 self.assertRaises(ValueError, lambda: p.port)
284
285 def test_attributes_without_netloc(self):
286 # This example is straight from RFC 3261. It looks like it
287 # should allow the username, hostname, and port to be filled
288 # in, but doesn't. Since it's a URI and doesn't use the
289 # scheme://netloc syntax, the netloc and related attributes
290 # should be left empty.
291 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000292 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000293 self.assertEqual(p.netloc, "")
294 self.assertEqual(p.username, None)
295 self.assertEqual(p.password, None)
296 self.assertEqual(p.hostname, None)
297 self.assertEqual(p.port, None)
298 self.assertEqual(p.geturl(), uri)
299
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000300 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000301 self.assertEqual(p.netloc, "")
302 self.assertEqual(p.username, None)
303 self.assertEqual(p.password, None)
304 self.assertEqual(p.hostname, None)
305 self.assertEqual(p.port, None)
306 self.assertEqual(p.geturl(), uri)
307
Christian Heimesfaf2f632008-01-06 16:59:19 +0000308 def test_noslash(self):
309 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000310 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000311 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
312
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000313def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000314 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000315
316if __name__ == "__main__":
317 test_main()