blob: f67487df1b7467a5cc0a513ede1910b53fe341b6 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Facundo Batista23e38562008-08-14 16:55:14 +00009RFC3986_BASE = "http://a/b/c/d;p?q"
Fred Drakea4d18a02001-01-05 05:57:04 +000010
Facundo Batistac469d4c2008-09-03 22:49:01 +000011# A list of test cases. Each test case is a a two-tuple that contains
12# a string with the query and a dictionary with the expected result.
13
14parse_qsl_test_cases = [
15 ("", []),
16 ("&", []),
17 ("&&", []),
18 ("=", [('', '')]),
19 ("=a", [('', 'a')]),
20 ("a", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("&a=b", [('a', 'b')]),
24 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
25 ("a=1&a=2", [('a', '1'), ('a', '2')]),
26]
27
Skip Montanaro6ec967d2002-03-23 05:32:10 +000028class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000029
30 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000031 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000032 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000033 t = (result.scheme, result.netloc, result.path,
34 result.params, result.query, result.fragment)
35 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000036 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000038 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000039 self.assertEqual(result2, result.geturl())
40
41 # the result of geturl() is a fixpoint; we can always parse it
42 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000044 self.assertEqual(result3.geturl(), result.geturl())
45 self.assertEqual(result3, result)
46 self.assertEqual(result3.scheme, result.scheme)
47 self.assertEqual(result3.netloc, result.netloc)
48 self.assertEqual(result3.path, result.path)
49 self.assertEqual(result3.params, result.params)
50 self.assertEqual(result3.query, result.query)
51 self.assertEqual(result3.fragment, result.fragment)
52 self.assertEqual(result3.username, result.username)
53 self.assertEqual(result3.password, result.password)
54 self.assertEqual(result3.hostname, result.hostname)
55 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000056
57 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000058 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000059 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000060 t = (result.scheme, result.netloc, result.path,
61 result.query, result.fragment)
62 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000064 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000065 self.assertEqual(result2, result.geturl())
66
67 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000069 self.assertEqual(result3.geturl(), result.geturl())
70 self.assertEqual(result3, result)
71 self.assertEqual(result3.scheme, result.scheme)
72 self.assertEqual(result3.netloc, result.netloc)
73 self.assertEqual(result3.path, result.path)
74 self.assertEqual(result3.query, result.query)
75 self.assertEqual(result3.fragment, result.fragment)
76 self.assertEqual(result3.username, result.username)
77 self.assertEqual(result3.password, result.password)
78 self.assertEqual(result3.hostname, result.hostname)
79 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000080
Facundo Batistac469d4c2008-09-03 22:49:01 +000081 def test_qsl(self):
82 for orig, expect in parse_qsl_test_cases:
83 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
84 self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
85
86
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000087 def test_roundtrips(self):
88 testcases = [
Fred Drake70705652002-10-16 21:02:36 +000089 ('file:///tmp/junk.txt',
90 ('file', '', '/tmp/junk.txt', '', '', ''),
91 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +000092 ('imap://mail.python.org/mbox1',
93 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
94 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +000095 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000096 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
97 '', '', ''),
98 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
99 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000100 ('nfs://server/path/to/file.txt',
101 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
102 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000103 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
104 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
105 '', '', ''),
106 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
107 '', ''))
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ]
109 for url, parsed, split in testcases:
110 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000111
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000112 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000114 # so we test both 'http:' and 'https:' in all the following.
115 # Three cheers for white box knowledge!
116 testcases = [
117 ('://www.python.org',
118 ('www.python.org', '', '', '', ''),
119 ('www.python.org', '', '', '')),
120 ('://www.python.org#abc',
121 ('www.python.org', '', '', '', 'abc'),
122 ('www.python.org', '', '', 'abc')),
123 ('://www.python.org?q=abc',
124 ('www.python.org', '', '', 'q=abc', ''),
125 ('www.python.org', '', 'q=abc', '')),
126 ('://www.python.org/#abc',
127 ('www.python.org', '/', '', '', 'abc'),
128 ('www.python.org', '/', '', 'abc')),
129 ('://a/b/c/d;p?q#f',
130 ('a', '/b/c/d', 'p', 'q', 'f'),
131 ('a', '/b/c/d;p', 'q', 'f')),
132 ]
133 for scheme in ('http', 'https'):
134 for url, parsed, split in testcases:
135 url = scheme + url
136 parsed = (scheme,) + parsed
137 split = (scheme,) + split
138 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000139
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000140 def checkJoin(self, base, relurl, expected):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
Guido van Rossumbbc05682002-10-14 19:59:54 +0000142 (base, relurl, expected))
143
144 def test_unparse_parse(self):
Senthil Kumaran7770f142010-04-12 07:03:17 +0000145 for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000146 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
147 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000148
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000149 def test_RFC1808(self):
150 # "normal" cases from RFC 1808:
151 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
152 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
153 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
154 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
155 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
156 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000157 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
158 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
159 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
160 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
161 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
162 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000163 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
164 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
165 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
166 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
167 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
168 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
169 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
170 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
171 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
172 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000173
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000174 # "abnormal" cases from RFC 1808:
175 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
176 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
177 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
178 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
179 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
180 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
181 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
182 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
183 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
184 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
185 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
186 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
187 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000188
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000189 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
190 # so we'll not actually run these tests (which expect 1808 behavior).
191 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
192 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000193
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 def test_RFC2396(self):
195 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000196
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000197
198 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
199 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
200 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
201 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
202 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
203 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
204 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
205 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
206 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
207 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
208 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
209 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
210 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
211 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
212 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
213 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
214 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
215 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
216 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
217 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
218 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
219 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
220 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
221 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
222 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
223 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
224 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
225 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
226 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
227 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
228 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
229 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
230 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
231 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
232 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
233 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
234 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
235 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
236 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
237
Facundo Batista23e38562008-08-14 16:55:14 +0000238 #The following scenarios have been updated in RFC3986
239 #self.checkJoin(RFC2396_BASE, '?y', 'http://a/b/c/?y')
240 #self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
241
Senthil Kumaranad02d232010-04-16 03:02:13 +0000242
Facundo Batista23e38562008-08-14 16:55:14 +0000243 def test_RFC3986(self):
244 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
245 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
246
Senthil Kumaranad02d232010-04-16 03:02:13 +0000247 def test_RFC2732(self):
248 for url, hostname, port in [
249 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
250 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
251 ('http://[::1]:5432/foo/', '::1', 5432),
252 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
253 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
254 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
255 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
256 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
257 ('http://[::ffff:12.34.56.78]:5432/foo/',
258 '::ffff:12.34.56.78', 5432),
259 ('http://Test.python.org/foo/', 'test.python.org', None),
260 ('http://12.34.56.78/foo/', '12.34.56.78', None),
261 ('http://[::1]/foo/', '::1', None),
262 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
263 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
264 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
265 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
266 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
267 ('http://[::ffff:12.34.56.78]/foo/',
268 '::ffff:12.34.56.78', None),
269 ]:
270 urlparsed = urllib.parse.urlparse(url)
271 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
272
273 for invalid_url in [
274 'http://::12.34.56.78]/',
275 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000276 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000277 'http://[::1/foo/bad]/bad',
Senthil Kumaranad02d232010-04-16 03:02:13 +0000278 'http://[::ffff:12.34.56.78']:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000279 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000280
Fred Drake70705652002-10-16 21:02:36 +0000281 def test_urldefrag(self):
282 for url, defrag, frag in [
283 ('http://python.org#frag', 'http://python.org', 'frag'),
284 ('http://python.org', 'http://python.org', ''),
285 ('http://python.org/#frag', 'http://python.org/', 'frag'),
286 ('http://python.org/', 'http://python.org/', ''),
287 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
288 ('http://python.org/?q', 'http://python.org/?q', ''),
289 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
290 ('http://python.org/p?q', 'http://python.org/p?q', ''),
291 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
292 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
293 ]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000294 self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
Fred Drake70705652002-10-16 21:02:36 +0000295
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000296 def test_urlsplit_attributes(self):
297 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000298 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000299 self.assertEqual(p.scheme, "http")
300 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
301 self.assertEqual(p.path, "/doc/")
302 self.assertEqual(p.query, "")
303 self.assertEqual(p.fragment, "frag")
304 self.assertEqual(p.username, None)
305 self.assertEqual(p.password, None)
306 self.assertEqual(p.hostname, "www.python.org")
307 self.assertEqual(p.port, None)
308 # geturl() won't return exactly the original URL in this case
309 # since the scheme is always case-normalized
310 #self.assertEqual(p.geturl(), url)
311
312 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000313 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000314 self.assertEqual(p.scheme, "http")
315 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
316 self.assertEqual(p.path, "/doc/")
317 self.assertEqual(p.query, "query=yes")
318 self.assertEqual(p.fragment, "frag")
319 self.assertEqual(p.username, "User")
320 self.assertEqual(p.password, "Pass")
321 self.assertEqual(p.hostname, "www.python.org")
322 self.assertEqual(p.port, 80)
323 self.assertEqual(p.geturl(), url)
324
Christian Heimesfaf2f632008-01-06 16:59:19 +0000325 # Addressing issue1698, which suggests Username can contain
326 # "@" characters. Though not RFC compliant, many ftp sites allow
327 # and request email addresses as usernames.
328
329 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000330 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000331 self.assertEqual(p.scheme, "http")
332 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
333 self.assertEqual(p.path, "/doc/")
334 self.assertEqual(p.query, "query=yes")
335 self.assertEqual(p.fragment, "frag")
336 self.assertEqual(p.username, "User@example.com")
337 self.assertEqual(p.password, "Pass")
338 self.assertEqual(p.hostname, "www.python.org")
339 self.assertEqual(p.port, 80)
340 self.assertEqual(p.geturl(), url)
341
342
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000343 def test_attributes_bad_port(self):
344 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000345 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000346 self.assertEqual(p.netloc, "www.example.net:foo")
347 self.assertRaises(ValueError, lambda: p.port)
348
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000350 self.assertEqual(p.netloc, "www.example.net:foo")
351 self.assertRaises(ValueError, lambda: p.port)
352
353 def test_attributes_without_netloc(self):
354 # This example is straight from RFC 3261. It looks like it
355 # should allow the username, hostname, and port to be filled
356 # in, but doesn't. Since it's a URI and doesn't use the
357 # scheme://netloc syntax, the netloc and related attributes
358 # should be left empty.
359 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000360 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000361 self.assertEqual(p.netloc, "")
362 self.assertEqual(p.username, None)
363 self.assertEqual(p.password, None)
364 self.assertEqual(p.hostname, None)
365 self.assertEqual(p.port, None)
366 self.assertEqual(p.geturl(), uri)
367
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000368 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000369 self.assertEqual(p.netloc, "")
370 self.assertEqual(p.username, None)
371 self.assertEqual(p.password, None)
372 self.assertEqual(p.hostname, None)
373 self.assertEqual(p.port, None)
374 self.assertEqual(p.geturl(), uri)
375
Christian Heimesfaf2f632008-01-06 16:59:19 +0000376 def test_noslash(self):
377 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000378 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000379 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
380
Facundo Batista2ac5de22008-07-07 18:24:11 +0000381 def test_usingsys(self):
382 # Issue 3314: sys module is used in the error
383 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
384
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000385 def test_anyscheme(self):
386 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000387 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
388 ('s3', 'foo.com', '/stuff', '', '', ''))
389 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
390 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000391
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000392def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000393 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000394
395if __name__ == "__main__":
396 test_main()