blob: 252eb138d06f23cf61ab0c9284a7798d5986faeb [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001#! /usr/bin/env python
2
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumaran06f29cd2010-05-07 04:24:30 +00009RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumarana6c03192010-07-14 10:28:38 +000010SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000011
Facundo Batistac469d4c2008-09-03 22:49:01 +000012# A list of test cases. Each test case is a a two-tuple that contains
13# a string with the query and a dictionary with the expected result.
14
15parse_qsl_test_cases = [
16 ("", []),
17 ("&", []),
18 ("&&", []),
19 ("=", [('', '')]),
20 ("=a", [('', 'a')]),
21 ("a", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("a=", [('a', '')]),
24 ("&a=b", [('a', 'b')]),
25 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
26 ("a=1&a=2", [('a', '1'), ('a', '2')]),
27]
28
Skip Montanaro6ec967d2002-03-23 05:32:10 +000029class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000030
31 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000032 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000033 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000034 t = (result.scheme, result.netloc, result.path,
35 result.params, result.query, result.fragment)
36 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000037 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000039 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000040 self.assertEqual(result2, result.geturl())
41
42 # the result of geturl() is a fixpoint; we can always parse it
43 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000044 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000045 self.assertEqual(result3.geturl(), result.geturl())
46 self.assertEqual(result3, result)
47 self.assertEqual(result3.scheme, result.scheme)
48 self.assertEqual(result3.netloc, result.netloc)
49 self.assertEqual(result3.path, result.path)
50 self.assertEqual(result3.params, result.params)
51 self.assertEqual(result3.query, result.query)
52 self.assertEqual(result3.fragment, result.fragment)
53 self.assertEqual(result3.username, result.username)
54 self.assertEqual(result3.password, result.password)
55 self.assertEqual(result3.hostname, result.hostname)
56 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000057
58 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000059 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000060 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000061 t = (result.scheme, result.netloc, result.path,
62 result.query, result.fragment)
63 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000064 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000065 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000066 self.assertEqual(result2, result.geturl())
67
68 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000069 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000070 self.assertEqual(result3.geturl(), result.geturl())
71 self.assertEqual(result3, result)
72 self.assertEqual(result3.scheme, result.scheme)
73 self.assertEqual(result3.netloc, result.netloc)
74 self.assertEqual(result3.path, result.path)
75 self.assertEqual(result3.query, result.query)
76 self.assertEqual(result3.fragment, result.fragment)
77 self.assertEqual(result3.username, result.username)
78 self.assertEqual(result3.password, result.password)
79 self.assertEqual(result3.hostname, result.hostname)
80 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000081
Facundo Batistac469d4c2008-09-03 22:49:01 +000082 def test_qsl(self):
83 for orig, expect in parse_qsl_test_cases:
84 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
85 self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
86
87
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000088 def test_roundtrips(self):
89 testcases = [
Fred Drake70705652002-10-16 21:02:36 +000090 ('file:///tmp/junk.txt',
91 ('file', '', '/tmp/junk.txt', '', '', ''),
92 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +000093 ('imap://mail.python.org/mbox1',
94 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
95 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +000096 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000097 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
98 '', '', ''),
99 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
100 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000101 ('nfs://server/path/to/file.txt',
102 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
103 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000104 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
105 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
106 '', '', ''),
107 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumarand4cd1882010-05-13 03:43:13 +0000108 '', '')),
109 ('git+ssh://git@github.com/user/project.git',
110 ('git+ssh', 'git@github.com','/user/project.git',
111 '','',''),
112 ('git+ssh', 'git@github.com','/user/project.git',
113 '', ''))
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000114 ]
115 for url, parsed, split in testcases:
116 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000117
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000118 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000119 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000120 # so we test both 'http:' and 'https:' in all the following.
121 # Three cheers for white box knowledge!
122 testcases = [
123 ('://www.python.org',
124 ('www.python.org', '', '', '', ''),
125 ('www.python.org', '', '', '')),
126 ('://www.python.org#abc',
127 ('www.python.org', '', '', '', 'abc'),
128 ('www.python.org', '', '', 'abc')),
129 ('://www.python.org?q=abc',
130 ('www.python.org', '', '', 'q=abc', ''),
131 ('www.python.org', '', 'q=abc', '')),
132 ('://www.python.org/#abc',
133 ('www.python.org', '/', '', '', 'abc'),
134 ('www.python.org', '/', '', 'abc')),
135 ('://a/b/c/d;p?q#f',
136 ('a', '/b/c/d', 'p', 'q', 'f'),
137 ('a', '/b/c/d;p', 'q', 'f')),
138 ]
139 for scheme in ('http', 'https'):
140 for url, parsed, split in testcases:
141 url = scheme + url
142 parsed = (scheme,) + parsed
143 split = (scheme,) + split
144 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000145
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000146 def checkJoin(self, base, relurl, expected):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000147 self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
Guido van Rossumbbc05682002-10-14 19:59:54 +0000148 (base, relurl, expected))
149
150 def test_unparse_parse(self):
Senthil Kumaran8be22112010-04-12 07:06:04 +0000151 for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000152 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
153 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000154
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000155 def test_RFC1808(self):
156 # "normal" cases from RFC 1808:
157 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
158 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
159 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
160 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
161 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
162 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000163 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
164 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
165 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
166 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
167 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
168 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000169 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
170 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
171 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
172 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
173 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
174 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
175 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
176 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
177 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
178 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000179
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000180 # "abnormal" cases from RFC 1808:
181 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
182 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
183 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
184 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
185 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
186 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
187 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
188 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
189 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
190 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
191 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
192 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
193 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000194
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000195 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
196 # so we'll not actually run these tests (which expect 1808 behavior).
197 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
198 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000199
Senthil Kumaran397eb442011-04-15 18:20:24 +0800200 def test_RFC2368(self):
201 # Issue 11467: path that starts with a number is not parsed correctly
202 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
203 ('mailto', '', '1337@example.org', '', '', ''))
204
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000205 def test_RFC2396(self):
206 # cases from RFC 2396
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000207 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
208 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
209 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
210 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
211 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
212 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
213 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
214 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
215 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
216 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
217 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
218 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
219 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
220 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
221 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
222 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
223 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
224 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
225 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
226 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
227 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
228 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
229 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
230 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
231 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
232 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
233 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
234 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
235 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
236 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
237 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
238 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
239 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
240 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
241 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
242 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
243 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
244 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
245 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
246
Facundo Batista23e38562008-08-14 16:55:14 +0000247 def test_RFC3986(self):
Senthil Kumaran06f29cd2010-05-07 04:24:30 +0000248 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000249 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
250 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumaran06f29cd2010-05-07 04:24:30 +0000251 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
252 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
253 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
254 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
255 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
256 self.checkJoin(RFC3986_BASE, '//g','http://g')
257 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
258 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
259 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
260 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
261 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
262 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
263 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
264 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
265 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
266 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
267 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
268 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
269 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
270 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
271 self.checkJoin(RFC3986_BASE, '../..','http://a/')
272 self.checkJoin(RFC3986_BASE, '../../','http://a/')
273 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
274
275 #Abnormal Examples
276 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
277 # Tests are here for reference.
278
279 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
280 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
281 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
282 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
283
284 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
285 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
286 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
287 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
288 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
289 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
290 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
291 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
292 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
293 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
294 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
295 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
296 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
297 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
298 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
299 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000300
Senthil Kumaran3396e862010-12-17 04:54:43 +0000301 # Test for issue9721
302 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
303
Senthil Kumarana6c03192010-07-14 10:28:38 +0000304 def test_urljoins(self):
305 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
306 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
307 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
308 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
309 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
310 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
311 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
312 self.checkJoin(SIMPLE_BASE, '//g','http://g')
313 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
314 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
315 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
316 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
317 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
318 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
319 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
320 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
321 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
322 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
323 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
324 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
325 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
326 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
327 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
328 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
329 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
330 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
331 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
332 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
333 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
334
Fred Drake70705652002-10-16 21:02:36 +0000335 def test_urldefrag(self):
336 for url, defrag, frag in [
337 ('http://python.org#frag', 'http://python.org', 'frag'),
338 ('http://python.org', 'http://python.org', ''),
339 ('http://python.org/#frag', 'http://python.org/', 'frag'),
340 ('http://python.org/', 'http://python.org/', ''),
341 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
342 ('http://python.org/?q', 'http://python.org/?q', ''),
343 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
344 ('http://python.org/p?q', 'http://python.org/p?q', ''),
345 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
346 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
347 ]:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000348 self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
Fred Drake70705652002-10-16 21:02:36 +0000349
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000350 def test_urlsplit_attributes(self):
351 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000352 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000353 self.assertEqual(p.scheme, "http")
354 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
355 self.assertEqual(p.path, "/doc/")
356 self.assertEqual(p.query, "")
357 self.assertEqual(p.fragment, "frag")
358 self.assertEqual(p.username, None)
359 self.assertEqual(p.password, None)
360 self.assertEqual(p.hostname, "www.python.org")
361 self.assertEqual(p.port, None)
362 # geturl() won't return exactly the original URL in this case
363 # since the scheme is always case-normalized
364 #self.assertEqual(p.geturl(), url)
365
366 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000367 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000368 self.assertEqual(p.scheme, "http")
369 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
370 self.assertEqual(p.path, "/doc/")
371 self.assertEqual(p.query, "query=yes")
372 self.assertEqual(p.fragment, "frag")
373 self.assertEqual(p.username, "User")
374 self.assertEqual(p.password, "Pass")
375 self.assertEqual(p.hostname, "www.python.org")
376 self.assertEqual(p.port, 80)
377 self.assertEqual(p.geturl(), url)
378
Christian Heimesfaf2f632008-01-06 16:59:19 +0000379 # Addressing issue1698, which suggests Username can contain
380 # "@" characters. Though not RFC compliant, many ftp sites allow
381 # and request email addresses as usernames.
382
383 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000384 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000385 self.assertEqual(p.scheme, "http")
386 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
387 self.assertEqual(p.path, "/doc/")
388 self.assertEqual(p.query, "query=yes")
389 self.assertEqual(p.fragment, "frag")
390 self.assertEqual(p.username, "User@example.com")
391 self.assertEqual(p.password, "Pass")
392 self.assertEqual(p.hostname, "www.python.org")
393 self.assertEqual(p.port, 80)
394 self.assertEqual(p.geturl(), url)
395
396
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000397 def test_attributes_bad_port(self):
398 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000399 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000400 self.assertEqual(p.netloc, "www.example.net:foo")
401 self.assertRaises(ValueError, lambda: p.port)
402
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000403 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000404 self.assertEqual(p.netloc, "www.example.net:foo")
405 self.assertRaises(ValueError, lambda: p.port)
406
407 def test_attributes_without_netloc(self):
408 # This example is straight from RFC 3261. It looks like it
409 # should allow the username, hostname, and port to be filled
410 # in, but doesn't. Since it's a URI and doesn't use the
411 # scheme://netloc syntax, the netloc and related attributes
412 # should be left empty.
413 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000414 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000415 self.assertEqual(p.netloc, "")
416 self.assertEqual(p.username, None)
417 self.assertEqual(p.password, None)
418 self.assertEqual(p.hostname, None)
419 self.assertEqual(p.port, None)
420 self.assertEqual(p.geturl(), uri)
421
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000422 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000423 self.assertEqual(p.netloc, "")
424 self.assertEqual(p.username, None)
425 self.assertEqual(p.password, None)
426 self.assertEqual(p.hostname, None)
427 self.assertEqual(p.port, None)
428 self.assertEqual(p.geturl(), uri)
429
Christian Heimesfaf2f632008-01-06 16:59:19 +0000430 def test_noslash(self):
431 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000432 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000433 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
434
Senthil Kumaran8801f7a2010-08-04 04:53:07 +0000435 def test_withoutscheme(self):
436 # Test urlparse without scheme
437 # Issue 754016: urlparse goes wrong with IP:port without scheme
438 # RFC 1808 specifies that netloc should start with //, urlparse expects
439 # the same, otherwise it classifies the portion of url as path.
440 self.assertEqual(urllib.parse.urlparse("path"),
441 ('','','path','','',''))
442 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
443 ('','www.python.org:80','','','',''))
444 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
445 ('http','www.python.org:80','','','',''))
446
447 def test_portseparator(self):
448 # Issue 754016 makes changes for port separator ':' from scheme separator
449 self.assertEqual(urllib.parse.urlparse("path:80"),
450 ('','','path:80','','',''))
451 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
452 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
453 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
454 ('http','www.python.org:80','','','',''))
455
Facundo Batista2ac5de22008-07-07 18:24:11 +0000456 def test_usingsys(self):
457 # Issue 3314: sys module is used in the error
458 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
459
Senthil Kumarana8dbb242010-02-19 07:45:03 +0000460 def test_anyscheme(self):
461 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti96034282010-02-19 14:51:06 +0000462 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
463 ('s3', 'foo.com', '/stuff', '', '', ''))
464 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
465 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumarana8dbb242010-02-19 07:45:03 +0000466
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000467def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000468 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000469
470if __name__ == "__main__":
471 test_main()