blob: 8c8070b11cbccfc18c61e6e65d8bb7a714604293 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Facundo Batista23e38562008-08-14 16:55:14 +00009RFC3986_BASE = "http://a/b/c/d;p?q"
Fred Drakea4d18a02001-01-05 05:57:04 +000010
Facundo Batistac469d4c2008-09-03 22:49:01 +000011# A list of test cases. Each test case is a a two-tuple that contains
12# a string with the query and a dictionary with the expected result.
13
14parse_qsl_test_cases = [
15 ("", []),
16 ("&", []),
17 ("&&", []),
18 ("=", [('', '')]),
19 ("=a", [('', 'a')]),
20 ("a", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("&a=b", [('a', 'b')]),
24 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
25 ("a=1&a=2", [('a', '1'), ('a', '2')]),
26]
27
Skip Montanaro6ec967d2002-03-23 05:32:10 +000028class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000029
30 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000031 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000032 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000033 t = (result.scheme, result.netloc, result.path,
34 result.params, result.query, result.fragment)
35 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000036 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000038 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000039 self.assertEqual(result2, result.geturl())
40
41 # the result of geturl() is a fixpoint; we can always parse it
42 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000044 self.assertEqual(result3.geturl(), result.geturl())
45 self.assertEqual(result3, result)
46 self.assertEqual(result3.scheme, result.scheme)
47 self.assertEqual(result3.netloc, result.netloc)
48 self.assertEqual(result3.path, result.path)
49 self.assertEqual(result3.params, result.params)
50 self.assertEqual(result3.query, result.query)
51 self.assertEqual(result3.fragment, result.fragment)
52 self.assertEqual(result3.username, result.username)
53 self.assertEqual(result3.password, result.password)
54 self.assertEqual(result3.hostname, result.hostname)
55 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000056
57 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000058 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000059 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000060 t = (result.scheme, result.netloc, result.path,
61 result.query, result.fragment)
62 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000064 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000065 self.assertEqual(result2, result.geturl())
66
67 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000069 self.assertEqual(result3.geturl(), result.geturl())
70 self.assertEqual(result3, result)
71 self.assertEqual(result3.scheme, result.scheme)
72 self.assertEqual(result3.netloc, result.netloc)
73 self.assertEqual(result3.path, result.path)
74 self.assertEqual(result3.query, result.query)
75 self.assertEqual(result3.fragment, result.fragment)
76 self.assertEqual(result3.username, result.username)
77 self.assertEqual(result3.password, result.password)
78 self.assertEqual(result3.hostname, result.hostname)
79 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000080
Facundo Batistac469d4c2008-09-03 22:49:01 +000081 def test_qsl(self):
82 for orig, expect in parse_qsl_test_cases:
83 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
84 self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
85
86
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000087 def test_roundtrips(self):
88 testcases = [
Fred Drake70705652002-10-16 21:02:36 +000089 ('file:///tmp/junk.txt',
90 ('file', '', '/tmp/junk.txt', '', '', ''),
91 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +000092 ('imap://mail.python.org/mbox1',
93 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
94 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +000095 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000096 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
97 '', '', ''),
98 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
99 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000100 ('nfs://server/path/to/file.txt',
101 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
102 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000103 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
104 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
105 '', '', ''),
106 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
107 '', ''))
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ]
109 for url, parsed, split in testcases:
110 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000111
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000112 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000114 # so we test both 'http:' and 'https:' in all the following.
115 # Three cheers for white box knowledge!
116 testcases = [
117 ('://www.python.org',
118 ('www.python.org', '', '', '', ''),
119 ('www.python.org', '', '', '')),
120 ('://www.python.org#abc',
121 ('www.python.org', '', '', '', 'abc'),
122 ('www.python.org', '', '', 'abc')),
123 ('://www.python.org?q=abc',
124 ('www.python.org', '', '', 'q=abc', ''),
125 ('www.python.org', '', 'q=abc', '')),
126 ('://www.python.org/#abc',
127 ('www.python.org', '/', '', '', 'abc'),
128 ('www.python.org', '/', '', 'abc')),
129 ('://a/b/c/d;p?q#f',
130 ('a', '/b/c/d', 'p', 'q', 'f'),
131 ('a', '/b/c/d;p', 'q', 'f')),
132 ]
133 for scheme in ('http', 'https'):
134 for url, parsed, split in testcases:
135 url = scheme + url
136 parsed = (scheme,) + parsed
137 split = (scheme,) + split
138 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000139
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000140 def checkJoin(self, base, relurl, expected):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
Guido van Rossumbbc05682002-10-14 19:59:54 +0000142 (base, relurl, expected))
143
144 def test_unparse_parse(self):
Senthil Kumaran7770f142010-04-12 07:03:17 +0000145 for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000146 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
147 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000148
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000149 def test_RFC1808(self):
150 # "normal" cases from RFC 1808:
151 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
152 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
153 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
154 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
155 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
156 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000157 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
158 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
159 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
160 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
161 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
162 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000163 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
164 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
165 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
166 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
167 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
168 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
169 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
170 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
171 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
172 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000173
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000174 # "abnormal" cases from RFC 1808:
175 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
176 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
177 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
178 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
179 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
180 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
181 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
182 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
183 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
184 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
185 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
186 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
187 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000188
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000189 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
190 # so we'll not actually run these tests (which expect 1808 behavior).
191 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
192 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000193
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 def test_RFC2396(self):
195 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000196
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000197
198 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
199 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
200 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
201 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
202 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
203 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
204 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
205 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
206 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
207 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
208 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
209 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
210 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
211 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
212 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
213 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
214 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
215 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
216 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
217 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
218 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
219 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
220 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
221 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
222 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
223 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
224 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
225 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
226 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
227 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
228 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
229 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
230 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
231 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
232 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
233 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
234 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
235 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
236 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
237
Facundo Batista23e38562008-08-14 16:55:14 +0000238 #The following scenarios have been updated in RFC3986
239 #self.checkJoin(RFC2396_BASE, '?y', 'http://a/b/c/?y')
240 #self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
241
Senthil Kumaranad02d232010-04-16 03:02:13 +0000242
Facundo Batista23e38562008-08-14 16:55:14 +0000243 def test_RFC3986(self):
244 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
245 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
246
Senthil Kumaranad02d232010-04-16 03:02:13 +0000247 def test_RFC2732(self):
248 for url, hostname, port in [
249 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
250 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
251 ('http://[::1]:5432/foo/', '::1', 5432),
252 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
253 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
254 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
255 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
256 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
257 ('http://[::ffff:12.34.56.78]:5432/foo/',
258 '::ffff:12.34.56.78', 5432),
259 ('http://Test.python.org/foo/', 'test.python.org', None),
260 ('http://12.34.56.78/foo/', '12.34.56.78', None),
261 ('http://[::1]/foo/', '::1', None),
262 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
263 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
264 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
265 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
266 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
267 ('http://[::ffff:12.34.56.78]/foo/',
268 '::ffff:12.34.56.78', None),
269 ]:
270 urlparsed = urllib.parse.urlparse(url)
271 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
272
273 for invalid_url in [
274 'http://::12.34.56.78]/',
275 'http://[::1/foo/',
276 'http://[::ffff:12.34.56.78']:
277 self.assertRaises(ValueError, lambda : urllib.parse.urlparse(invalid_url).hostname)
278 self.assertRaises(ValueError, lambda : urllib.parse.urlparse(invalid_url))
279
Fred Drake70705652002-10-16 21:02:36 +0000280 def test_urldefrag(self):
281 for url, defrag, frag in [
282 ('http://python.org#frag', 'http://python.org', 'frag'),
283 ('http://python.org', 'http://python.org', ''),
284 ('http://python.org/#frag', 'http://python.org/', 'frag'),
285 ('http://python.org/', 'http://python.org/', ''),
286 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
287 ('http://python.org/?q', 'http://python.org/?q', ''),
288 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
289 ('http://python.org/p?q', 'http://python.org/p?q', ''),
290 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
291 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
292 ]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000293 self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
Fred Drake70705652002-10-16 21:02:36 +0000294
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000295 def test_urlsplit_attributes(self):
296 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000297 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000298 self.assertEqual(p.scheme, "http")
299 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
300 self.assertEqual(p.path, "/doc/")
301 self.assertEqual(p.query, "")
302 self.assertEqual(p.fragment, "frag")
303 self.assertEqual(p.username, None)
304 self.assertEqual(p.password, None)
305 self.assertEqual(p.hostname, "www.python.org")
306 self.assertEqual(p.port, None)
307 # geturl() won't return exactly the original URL in this case
308 # since the scheme is always case-normalized
309 #self.assertEqual(p.geturl(), url)
310
311 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000312 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000313 self.assertEqual(p.scheme, "http")
314 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
315 self.assertEqual(p.path, "/doc/")
316 self.assertEqual(p.query, "query=yes")
317 self.assertEqual(p.fragment, "frag")
318 self.assertEqual(p.username, "User")
319 self.assertEqual(p.password, "Pass")
320 self.assertEqual(p.hostname, "www.python.org")
321 self.assertEqual(p.port, 80)
322 self.assertEqual(p.geturl(), url)
323
Christian Heimesfaf2f632008-01-06 16:59:19 +0000324 # Addressing issue1698, which suggests Username can contain
325 # "@" characters. Though not RFC compliant, many ftp sites allow
326 # and request email addresses as usernames.
327
328 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000329 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000330 self.assertEqual(p.scheme, "http")
331 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
332 self.assertEqual(p.path, "/doc/")
333 self.assertEqual(p.query, "query=yes")
334 self.assertEqual(p.fragment, "frag")
335 self.assertEqual(p.username, "User@example.com")
336 self.assertEqual(p.password, "Pass")
337 self.assertEqual(p.hostname, "www.python.org")
338 self.assertEqual(p.port, 80)
339 self.assertEqual(p.geturl(), url)
340
341
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000342 def test_attributes_bad_port(self):
343 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000344 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000345 self.assertEqual(p.netloc, "www.example.net:foo")
346 self.assertRaises(ValueError, lambda: p.port)
347
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000348 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000349 self.assertEqual(p.netloc, "www.example.net:foo")
350 self.assertRaises(ValueError, lambda: p.port)
351
352 def test_attributes_without_netloc(self):
353 # This example is straight from RFC 3261. It looks like it
354 # should allow the username, hostname, and port to be filled
355 # in, but doesn't. Since it's a URI and doesn't use the
356 # scheme://netloc syntax, the netloc and related attributes
357 # should be left empty.
358 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000359 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000360 self.assertEqual(p.netloc, "")
361 self.assertEqual(p.username, None)
362 self.assertEqual(p.password, None)
363 self.assertEqual(p.hostname, None)
364 self.assertEqual(p.port, None)
365 self.assertEqual(p.geturl(), uri)
366
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000367 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000368 self.assertEqual(p.netloc, "")
369 self.assertEqual(p.username, None)
370 self.assertEqual(p.password, None)
371 self.assertEqual(p.hostname, None)
372 self.assertEqual(p.port, None)
373 self.assertEqual(p.geturl(), uri)
374
Christian Heimesfaf2f632008-01-06 16:59:19 +0000375 def test_noslash(self):
376 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000377 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000378 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
379
Facundo Batista2ac5de22008-07-07 18:24:11 +0000380 def test_usingsys(self):
381 # Issue 3314: sys module is used in the error
382 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
383
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000384 def test_anyscheme(self):
385 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000386 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
387 ('s3', 'foo.com', '/stuff', '', '', ''))
388 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
389 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000390
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000391def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000392 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000393
394if __name__ == "__main__":
395 test_main()