blob: 00c4a0a893b6b5f5b304cb26479266109aa9f8ac [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001#! /usr/bin/env python
2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumaran06f29cd2010-05-07 04:24:30 +00009RFC3986_BASE = 'http://a/b/c/d;p?q'
Fred Drakea4d18a02001-01-05 05:57:04 +000010
Facundo Batistac469d4c2008-09-03 22:49:01 +000011# A list of test cases. Each test case is a a two-tuple that contains
12# a string with the query and a dictionary with the expected result.
13
14parse_qsl_test_cases = [
15 ("", []),
16 ("&", []),
17 ("&&", []),
18 ("=", [('', '')]),
19 ("=a", [('', 'a')]),
20 ("a", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("&a=b", [('a', 'b')]),
24 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
25 ("a=1&a=2", [('a', '1'), ('a', '2')]),
26]
27
Skip Montanaro6ec967d2002-03-23 05:32:10 +000028class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000029
30 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000031 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000032 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000033 t = (result.scheme, result.netloc, result.path,
34 result.params, result.query, result.fragment)
35 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000036 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000038 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000039 self.assertEqual(result2, result.geturl())
40
41 # the result of geturl() is a fixpoint; we can always parse it
42 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000044 self.assertEqual(result3.geturl(), result.geturl())
45 self.assertEqual(result3, result)
46 self.assertEqual(result3.scheme, result.scheme)
47 self.assertEqual(result3.netloc, result.netloc)
48 self.assertEqual(result3.path, result.path)
49 self.assertEqual(result3.params, result.params)
50 self.assertEqual(result3.query, result.query)
51 self.assertEqual(result3.fragment, result.fragment)
52 self.assertEqual(result3.username, result.username)
53 self.assertEqual(result3.password, result.password)
54 self.assertEqual(result3.hostname, result.hostname)
55 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000056
57 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000058 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000059 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000060 t = (result.scheme, result.netloc, result.path,
61 result.query, result.fragment)
62 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000064 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000065 self.assertEqual(result2, result.geturl())
66
67 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000069 self.assertEqual(result3.geturl(), result.geturl())
70 self.assertEqual(result3, result)
71 self.assertEqual(result3.scheme, result.scheme)
72 self.assertEqual(result3.netloc, result.netloc)
73 self.assertEqual(result3.path, result.path)
74 self.assertEqual(result3.query, result.query)
75 self.assertEqual(result3.fragment, result.fragment)
76 self.assertEqual(result3.username, result.username)
77 self.assertEqual(result3.password, result.password)
78 self.assertEqual(result3.hostname, result.hostname)
79 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000080
Facundo Batistac469d4c2008-09-03 22:49:01 +000081 def test_qsl(self):
82 for orig, expect in parse_qsl_test_cases:
83 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
84 self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
85
86
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000087 def test_roundtrips(self):
88 testcases = [
Fred Drake70705652002-10-16 21:02:36 +000089 ('file:///tmp/junk.txt',
90 ('file', '', '/tmp/junk.txt', '', '', ''),
91 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +000092 ('imap://mail.python.org/mbox1',
93 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
94 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +000095 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000096 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
97 '', '', ''),
98 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
99 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000100 ('nfs://server/path/to/file.txt',
101 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
102 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000103 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
104 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
105 '', '', ''),
106 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
107 '', ''))
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ]
109 for url, parsed, split in testcases:
110 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000111
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000112 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000113 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000114 # so we test both 'http:' and 'https:' in all the following.
115 # Three cheers for white box knowledge!
116 testcases = [
117 ('://www.python.org',
118 ('www.python.org', '', '', '', ''),
119 ('www.python.org', '', '', '')),
120 ('://www.python.org#abc',
121 ('www.python.org', '', '', '', 'abc'),
122 ('www.python.org', '', '', 'abc')),
123 ('://www.python.org?q=abc',
124 ('www.python.org', '', '', 'q=abc', ''),
125 ('www.python.org', '', 'q=abc', '')),
126 ('://www.python.org/#abc',
127 ('www.python.org', '/', '', '', 'abc'),
128 ('www.python.org', '/', '', 'abc')),
129 ('://a/b/c/d;p?q#f',
130 ('a', '/b/c/d', 'p', 'q', 'f'),
131 ('a', '/b/c/d;p', 'q', 'f')),
132 ]
133 for scheme in ('http', 'https'):
134 for url, parsed, split in testcases:
135 url = scheme + url
136 parsed = (scheme,) + parsed
137 split = (scheme,) + split
138 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000139
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000140 def checkJoin(self, base, relurl, expected):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000141 self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
Guido van Rossumbbc05682002-10-14 19:59:54 +0000142 (base, relurl, expected))
143
144 def test_unparse_parse(self):
Senthil Kumaran8be22112010-04-12 07:06:04 +0000145 for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000146 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
147 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000148
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000149 def test_RFC1808(self):
150 # "normal" cases from RFC 1808:
151 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
152 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
153 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
154 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
155 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
156 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000157 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
158 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
159 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
160 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
161 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
162 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000163 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
164 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
165 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
166 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
167 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
168 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
169 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
170 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
171 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
172 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000173
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000174 # "abnormal" cases from RFC 1808:
175 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
176 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
177 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
178 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
179 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
180 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
181 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
182 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
183 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
184 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
185 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
186 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
187 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000188
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000189 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
190 # so we'll not actually run these tests (which expect 1808 behavior).
191 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
192 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000193
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 def test_RFC2396(self):
195 # cases from RFC 2396
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000196 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
197 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
198 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
199 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
200 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
201 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
202 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
203 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
204 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
205 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
206 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
207 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
208 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
209 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
210 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
211 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
212 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
213 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
214 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
215 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
216 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
217 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
218 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
219 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
220 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
221 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
222 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
223 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
224 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
225 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
226 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
227 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
228 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
229 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
230 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
231 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
232 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
233 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
234 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
235
Facundo Batista23e38562008-08-14 16:55:14 +0000236 def test_RFC3986(self):
Senthil Kumaran06f29cd2010-05-07 04:24:30 +0000237 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000238 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
239 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumaran06f29cd2010-05-07 04:24:30 +0000240 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
241 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
242 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
243 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
244 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
245 self.checkJoin(RFC3986_BASE, '//g','http://g')
246 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
247 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
248 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
249 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
250 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
251 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
252 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
253 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
254 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
255 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
256 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
257 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
258 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
259 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
260 self.checkJoin(RFC3986_BASE, '../..','http://a/')
261 self.checkJoin(RFC3986_BASE, '../../','http://a/')
262 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
263
264 #Abnormal Examples
265 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
266 # Tests are here for reference.
267
268 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
269 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
270 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
271 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
272
273 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
274 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
275 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
276 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
277 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
278 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
279 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
280 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
281 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
282 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
283 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
284 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
285 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
286 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
287 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
288 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000289
Fred Drake70705652002-10-16 21:02:36 +0000290 def test_urldefrag(self):
291 for url, defrag, frag in [
292 ('http://python.org#frag', 'http://python.org', 'frag'),
293 ('http://python.org', 'http://python.org', ''),
294 ('http://python.org/#frag', 'http://python.org/', 'frag'),
295 ('http://python.org/', 'http://python.org/', ''),
296 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
297 ('http://python.org/?q', 'http://python.org/?q', ''),
298 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
299 ('http://python.org/p?q', 'http://python.org/p?q', ''),
300 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
301 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
302 ]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000303 self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
Fred Drake70705652002-10-16 21:02:36 +0000304
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000305 def test_urlsplit_attributes(self):
306 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000307 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000308 self.assertEqual(p.scheme, "http")
309 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
310 self.assertEqual(p.path, "/doc/")
311 self.assertEqual(p.query, "")
312 self.assertEqual(p.fragment, "frag")
313 self.assertEqual(p.username, None)
314 self.assertEqual(p.password, None)
315 self.assertEqual(p.hostname, "www.python.org")
316 self.assertEqual(p.port, None)
317 # geturl() won't return exactly the original URL in this case
318 # since the scheme is always case-normalized
319 #self.assertEqual(p.geturl(), url)
320
321 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000322 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000323 self.assertEqual(p.scheme, "http")
324 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
325 self.assertEqual(p.path, "/doc/")
326 self.assertEqual(p.query, "query=yes")
327 self.assertEqual(p.fragment, "frag")
328 self.assertEqual(p.username, "User")
329 self.assertEqual(p.password, "Pass")
330 self.assertEqual(p.hostname, "www.python.org")
331 self.assertEqual(p.port, 80)
332 self.assertEqual(p.geturl(), url)
333
Christian Heimesfaf2f632008-01-06 16:59:19 +0000334 # Addressing issue1698, which suggests Username can contain
335 # "@" characters. Though not RFC compliant, many ftp sites allow
336 # and request email addresses as usernames.
337
338 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000339 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000340 self.assertEqual(p.scheme, "http")
341 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
342 self.assertEqual(p.path, "/doc/")
343 self.assertEqual(p.query, "query=yes")
344 self.assertEqual(p.fragment, "frag")
345 self.assertEqual(p.username, "User@example.com")
346 self.assertEqual(p.password, "Pass")
347 self.assertEqual(p.hostname, "www.python.org")
348 self.assertEqual(p.port, 80)
349 self.assertEqual(p.geturl(), url)
350
351
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000352 def test_attributes_bad_port(self):
353 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000354 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000355 self.assertEqual(p.netloc, "www.example.net:foo")
356 self.assertRaises(ValueError, lambda: p.port)
357
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000358 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000359 self.assertEqual(p.netloc, "www.example.net:foo")
360 self.assertRaises(ValueError, lambda: p.port)
361
362 def test_attributes_without_netloc(self):
363 # This example is straight from RFC 3261. It looks like it
364 # should allow the username, hostname, and port to be filled
365 # in, but doesn't. Since it's a URI and doesn't use the
366 # scheme://netloc syntax, the netloc and related attributes
367 # should be left empty.
368 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000369 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000370 self.assertEqual(p.netloc, "")
371 self.assertEqual(p.username, None)
372 self.assertEqual(p.password, None)
373 self.assertEqual(p.hostname, None)
374 self.assertEqual(p.port, None)
375 self.assertEqual(p.geturl(), uri)
376
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000377 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000378 self.assertEqual(p.netloc, "")
379 self.assertEqual(p.username, None)
380 self.assertEqual(p.password, None)
381 self.assertEqual(p.hostname, None)
382 self.assertEqual(p.port, None)
383 self.assertEqual(p.geturl(), uri)
384
Christian Heimesfaf2f632008-01-06 16:59:19 +0000385 def test_noslash(self):
386 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000387 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000388 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
389
Facundo Batista2ac5de22008-07-07 18:24:11 +0000390 def test_usingsys(self):
391 # Issue 3314: sys module is used in the error
392 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
393
Senthil Kumarana8dbb242010-02-19 07:45:03 +0000394 def test_anyscheme(self):
395 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti96034282010-02-19 14:51:06 +0000396 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
397 ('s3', 'foo.com', '/stuff', '', '', ''))
398 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
399 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumarana8dbb242010-02-19 07:45:03 +0000400
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000401def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000402 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000403
404if __name__ == "__main__":
405 test_main()