blob: 0481f0b416092f1b1bac1df97679ef71005ec840 [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00003
Fred Drakea4d18a02001-01-05 05:57:04 +00004RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00005RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00006RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00007SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00008
Ezio Melottie130a522011-10-19 10:58:56 +03009# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000010# a string with the query and a dictionary with the expected result.
11
12parse_qsl_test_cases = [
13 ("", []),
14 ("&", []),
15 ("&&", []),
16 ("=", [('', '')]),
17 ("=a", [('', 'a')]),
18 ("a", [('a', '')]),
19 ("a=", [('a', '')]),
20 ("a=", [('a', '')]),
21 ("&a=b", [('a', 'b')]),
22 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
23 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000024 (b"", []),
25 (b"&", []),
26 (b"&&", []),
27 (b"=", [(b'', b'')]),
28 (b"=a", [(b'', b'a')]),
29 (b"a", [(b'a', b'')]),
30 (b"a=", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
32 (b"&a=b", [(b'a', b'b')]),
33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000035]
36
Skip Montanaro6ec967d2002-03-23 05:32:10 +000037class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000038
39 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000040 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000042 t = (result.scheme, result.netloc, result.path,
43 result.params, result.query, result.fragment)
44 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000045 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000047 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000048 self.assertEqual(result2, result.geturl())
49
50 # the result of geturl() is a fixpoint; we can always parse it
51 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000052 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000053 self.assertEqual(result3.geturl(), result.geturl())
54 self.assertEqual(result3, result)
55 self.assertEqual(result3.scheme, result.scheme)
56 self.assertEqual(result3.netloc, result.netloc)
57 self.assertEqual(result3.path, result.path)
58 self.assertEqual(result3.params, result.params)
59 self.assertEqual(result3.query, result.query)
60 self.assertEqual(result3.fragment, result.fragment)
61 self.assertEqual(result3.username, result.username)
62 self.assertEqual(result3.password, result.password)
63 self.assertEqual(result3.hostname, result.hostname)
64 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000065
66 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000067 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000069 t = (result.scheme, result.netloc, result.path,
70 result.query, result.fragment)
71 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000072 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000073 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000074 self.assertEqual(result2, result.geturl())
75
76 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000077 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000078 self.assertEqual(result3.geturl(), result.geturl())
79 self.assertEqual(result3, result)
80 self.assertEqual(result3.scheme, result.scheme)
81 self.assertEqual(result3.netloc, result.netloc)
82 self.assertEqual(result3.path, result.path)
83 self.assertEqual(result3.query, result.query)
84 self.assertEqual(result3.fragment, result.fragment)
85 self.assertEqual(result3.username, result.username)
86 self.assertEqual(result3.password, result.password)
87 self.assertEqual(result3.hostname, result.hostname)
88 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000089
Facundo Batistac469d4c2008-09-03 22:49:01 +000090 def test_qsl(self):
91 for orig, expect in parse_qsl_test_cases:
92 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080093 self.assertEqual(result, expect, "Error parsing %r" % orig)
94 expect_without_blanks = [v for v in expect if len(v[1])]
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
96 self.assertEqual(result, expect_without_blanks,
97 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +000098
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000099 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000100 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000101 ('file:///tmp/junk.txt',
102 ('file', '', '/tmp/junk.txt', '', '', ''),
103 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000104 ('imap://mail.python.org/mbox1',
105 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
106 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000107 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
109 '', '', ''),
110 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
111 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000112 ('nfs://server/path/to/file.txt',
113 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
114 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000115 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
116 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
117 '', '', ''),
118 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000119 '', '')),
120 ('git+ssh://git@github.com/user/project.git',
121 ('git+ssh', 'git@github.com','/user/project.git',
122 '','',''),
123 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000124 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000125 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000126 def _encode(t):
127 return (t[0].encode('ascii'),
128 tuple(x.encode('ascii') for x in t[1]),
129 tuple(x.encode('ascii') for x in t[2]))
130 bytes_cases = [_encode(x) for x in str_cases]
131 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000132 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000133
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000134 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000136 # so we test both 'http:' and 'https:' in all the following.
137 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000138 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 ('://www.python.org',
140 ('www.python.org', '', '', '', ''),
141 ('www.python.org', '', '', '')),
142 ('://www.python.org#abc',
143 ('www.python.org', '', '', '', 'abc'),
144 ('www.python.org', '', '', 'abc')),
145 ('://www.python.org?q=abc',
146 ('www.python.org', '', '', 'q=abc', ''),
147 ('www.python.org', '', 'q=abc', '')),
148 ('://www.python.org/#abc',
149 ('www.python.org', '/', '', '', 'abc'),
150 ('www.python.org', '/', '', 'abc')),
151 ('://a/b/c/d;p?q#f',
152 ('a', '/b/c/d', 'p', 'q', 'f'),
153 ('a', '/b/c/d;p', 'q', 'f')),
154 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000155 def _encode(t):
156 return (t[0].encode('ascii'),
157 tuple(x.encode('ascii') for x in t[1]),
158 tuple(x.encode('ascii') for x in t[2]))
159 bytes_cases = [_encode(x) for x in str_cases]
160 str_schemes = ('http', 'https')
161 bytes_schemes = (b'http', b'https')
162 str_tests = str_schemes, str_cases
163 bytes_tests = bytes_schemes, bytes_cases
164 for schemes, test_cases in (str_tests, bytes_tests):
165 for scheme in schemes:
166 for url, parsed, split in test_cases:
167 url = scheme + url
168 parsed = (scheme,) + parsed
169 split = (scheme,) + split
170 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000171
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000172 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000173 str_components = (base, relurl, expected)
174 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
175 bytes_components = baseb, relurlb, expectedb = [
176 x.encode('ascii') for x in str_components]
177 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000178
179 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000180 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
181 bytes_cases = [x.encode('ascii') for x in str_cases]
182 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000183 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
184 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000185
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000186 def test_RFC1808(self):
187 # "normal" cases from RFC 1808:
188 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
189 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
190 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
191 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
192 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
193 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
195 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
196 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
197 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
198 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
199 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000200 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
201 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
202 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
203 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
204 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
205 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
206 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
207 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
208 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
209 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000210
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000211 # "abnormal" cases from RFC 1808:
212 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000213 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
214 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
215 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
216 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
217 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
218 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
219 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
220 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000221
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000222 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
223 # so we'll not actually run these tests (which expect 1808 behavior).
224 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
225 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000226
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400227 # XXX: The following tests are no longer compatible with RFC3986
228 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
229 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
230 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
231 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
232
233
Senthil Kumaran397eb442011-04-15 18:20:24 +0800234 def test_RFC2368(self):
235 # Issue 11467: path that starts with a number is not parsed correctly
236 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
237 ('mailto', '', '1337@example.org', '', '', ''))
238
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000239 def test_RFC2396(self):
240 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000241
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000242
243 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
244 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
245 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
246 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
247 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
248 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
249 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
250 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
251 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
252 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
253 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
254 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
255 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
256 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
257 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
258 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
259 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
260 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
261 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
262 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
263 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000264 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
265 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
266 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
267 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
268 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
269 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
270 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
271 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
272 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
273 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
274 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
275 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
276 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
277 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
278
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400279 # XXX: The following tests are no longer compatible with RFC3986
280 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
281 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
282 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
283 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
284
285
Facundo Batista23e38562008-08-14 16:55:14 +0000286 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000287 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000288 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400289 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000290 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
291 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
292 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
293 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
294 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
295 self.checkJoin(RFC3986_BASE, '//g','http://g')
296 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
297 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
298 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
299 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
300 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
301 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
302 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
303 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
304 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
305 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
306 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
307 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
308 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
309 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
310 self.checkJoin(RFC3986_BASE, '../..','http://a/')
311 self.checkJoin(RFC3986_BASE, '../../','http://a/')
312 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400313 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000314
315 #Abnormal Examples
316
317 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
318 # Tests are here for reference.
319
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400320 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
321 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
322 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
323 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000324 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
325 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
326 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
327 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
328 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
329 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
330 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
331 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
332 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
333 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
334 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
335 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
336 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
337 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
338 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
339 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000340
Senthil Kumarandca5b862010-12-17 04:48:45 +0000341 # Test for issue9721
342 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
343
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000344 def test_urljoins(self):
345 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
346 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
347 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
348 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
349 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
350 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
351 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
352 self.checkJoin(SIMPLE_BASE, '//g','http://g')
353 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
354 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
355 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
356 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
357 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
358 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
359 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
360 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
361 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
362 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000363 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
364 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000365 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
366 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
367 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
368 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
369 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
370 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
371 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800372 self.checkJoin('http:///', '..','http:///')
373 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
374 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800375 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800376 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000377
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400378 # XXX: The following tests are no longer compatible with RFC3986
379 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
380 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
381
Senthil Kumarana66e3882014-09-22 15:49:16 +0800382 # test for issue22118 duplicate slashes
383 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
384
385 # Non-RFC-defined tests, covering variations of base and trailing
386 # slashes
387 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
388 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
389 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
390 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
391 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
392 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
393
Senthil Kumaranad02d232010-04-16 03:02:13 +0000394 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000395 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000396 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
397 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
398 ('http://[::1]:5432/foo/', '::1', 5432),
399 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
400 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
401 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
402 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
403 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
404 ('http://[::ffff:12.34.56.78]:5432/foo/',
405 '::ffff:12.34.56.78', 5432),
406 ('http://Test.python.org/foo/', 'test.python.org', None),
407 ('http://12.34.56.78/foo/', '12.34.56.78', None),
408 ('http://[::1]/foo/', '::1', None),
409 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
410 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
411 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
412 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
413 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
414 ('http://[::ffff:12.34.56.78]/foo/',
415 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200416 ('http://Test.python.org:/foo/', 'test.python.org', None),
417 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
418 ('http://[::1]:/foo/', '::1', None),
419 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
420 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
421 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
422 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
423 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
424 ('http://[::ffff:12.34.56.78]:/foo/',
425 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000426 ]
427 def _encode(t):
428 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
429 bytes_cases = [_encode(x) for x in str_cases]
430 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000431 urlparsed = urllib.parse.urlparse(url)
432 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
433
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000434 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000435 'http://::12.34.56.78]/',
436 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000437 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000438 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000439 'http://[::ffff:12.34.56.78']
440 bytes_cases = [x.encode('ascii') for x in str_cases]
441 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000442 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000443
Fred Drake70705652002-10-16 21:02:36 +0000444 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000445 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000446 ('http://python.org#frag', 'http://python.org', 'frag'),
447 ('http://python.org', 'http://python.org', ''),
448 ('http://python.org/#frag', 'http://python.org/', 'frag'),
449 ('http://python.org/', 'http://python.org/', ''),
450 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
451 ('http://python.org/?q', 'http://python.org/?q', ''),
452 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
453 ('http://python.org/p?q', 'http://python.org/p?q', ''),
454 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
455 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000456 ]
457 def _encode(t):
458 return type(t)(x.encode('ascii') for x in t)
459 bytes_cases = [_encode(x) for x in str_cases]
460 for url, defrag, frag in str_cases + bytes_cases:
461 result = urllib.parse.urldefrag(url)
462 self.assertEqual(result.geturl(), url)
463 self.assertEqual(result, (defrag, frag))
464 self.assertEqual(result.url, defrag)
465 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000466
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000467 def test_urlsplit_attributes(self):
468 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000469 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000470 self.assertEqual(p.scheme, "http")
471 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
472 self.assertEqual(p.path, "/doc/")
473 self.assertEqual(p.query, "")
474 self.assertEqual(p.fragment, "frag")
475 self.assertEqual(p.username, None)
476 self.assertEqual(p.password, None)
477 self.assertEqual(p.hostname, "www.python.org")
478 self.assertEqual(p.port, None)
479 # geturl() won't return exactly the original URL in this case
480 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000481 # We handle this by ignoring the first 4 characters of the URL
482 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000483
484 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000485 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000486 self.assertEqual(p.scheme, "http")
487 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
488 self.assertEqual(p.path, "/doc/")
489 self.assertEqual(p.query, "query=yes")
490 self.assertEqual(p.fragment, "frag")
491 self.assertEqual(p.username, "User")
492 self.assertEqual(p.password, "Pass")
493 self.assertEqual(p.hostname, "www.python.org")
494 self.assertEqual(p.port, 80)
495 self.assertEqual(p.geturl(), url)
496
Christian Heimesfaf2f632008-01-06 16:59:19 +0000497 # Addressing issue1698, which suggests Username can contain
498 # "@" characters. Though not RFC compliant, many ftp sites allow
499 # and request email addresses as usernames.
500
501 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000502 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000503 self.assertEqual(p.scheme, "http")
504 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
505 self.assertEqual(p.path, "/doc/")
506 self.assertEqual(p.query, "query=yes")
507 self.assertEqual(p.fragment, "frag")
508 self.assertEqual(p.username, "User@example.com")
509 self.assertEqual(p.password, "Pass")
510 self.assertEqual(p.hostname, "www.python.org")
511 self.assertEqual(p.port, 80)
512 self.assertEqual(p.geturl(), url)
513
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000514 # And check them all again, only with bytes this time
515 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
516 p = urllib.parse.urlsplit(url)
517 self.assertEqual(p.scheme, b"http")
518 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
519 self.assertEqual(p.path, b"/doc/")
520 self.assertEqual(p.query, b"")
521 self.assertEqual(p.fragment, b"frag")
522 self.assertEqual(p.username, None)
523 self.assertEqual(p.password, None)
524 self.assertEqual(p.hostname, b"www.python.org")
525 self.assertEqual(p.port, None)
526 self.assertEqual(p.geturl()[4:], url[4:])
527
528 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
529 p = urllib.parse.urlsplit(url)
530 self.assertEqual(p.scheme, b"http")
531 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
532 self.assertEqual(p.path, b"/doc/")
533 self.assertEqual(p.query, b"query=yes")
534 self.assertEqual(p.fragment, b"frag")
535 self.assertEqual(p.username, b"User")
536 self.assertEqual(p.password, b"Pass")
537 self.assertEqual(p.hostname, b"www.python.org")
538 self.assertEqual(p.port, 80)
539 self.assertEqual(p.geturl(), url)
540
541 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
542 p = urllib.parse.urlsplit(url)
543 self.assertEqual(p.scheme, b"http")
544 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
545 self.assertEqual(p.path, b"/doc/")
546 self.assertEqual(p.query, b"query=yes")
547 self.assertEqual(p.fragment, b"frag")
548 self.assertEqual(p.username, b"User@example.com")
549 self.assertEqual(p.password, b"Pass")
550 self.assertEqual(p.hostname, b"www.python.org")
551 self.assertEqual(p.port, 80)
552 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000553
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800554 # Verify an illegal port is returned as None
555 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
556 p = urllib.parse.urlsplit(url)
557 self.assertEqual(p.port, None)
558
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000559 def test_attributes_bad_port(self):
560 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000561 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000562 self.assertEqual(p.netloc, "www.example.net:foo")
563 self.assertRaises(ValueError, lambda: p.port)
564
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000565 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000566 self.assertEqual(p.netloc, "www.example.net:foo")
567 self.assertRaises(ValueError, lambda: p.port)
568
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000569 # Once again, repeat ourselves to test bytes
570 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
571 self.assertEqual(p.netloc, b"www.example.net:foo")
572 self.assertRaises(ValueError, lambda: p.port)
573
574 p = urllib.parse.urlparse(b"http://www.example.net:foo")
575 self.assertEqual(p.netloc, b"www.example.net:foo")
576 self.assertRaises(ValueError, lambda: p.port)
577
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000578 def test_attributes_without_netloc(self):
579 # This example is straight from RFC 3261. It looks like it
580 # should allow the username, hostname, and port to be filled
581 # in, but doesn't. Since it's a URI and doesn't use the
582 # scheme://netloc syntax, the netloc and related attributes
583 # should be left empty.
584 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000585 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000586 self.assertEqual(p.netloc, "")
587 self.assertEqual(p.username, None)
588 self.assertEqual(p.password, None)
589 self.assertEqual(p.hostname, None)
590 self.assertEqual(p.port, None)
591 self.assertEqual(p.geturl(), uri)
592
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000593 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000594 self.assertEqual(p.netloc, "")
595 self.assertEqual(p.username, None)
596 self.assertEqual(p.password, None)
597 self.assertEqual(p.hostname, None)
598 self.assertEqual(p.port, None)
599 self.assertEqual(p.geturl(), uri)
600
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000601 # You guessed it, repeating the test with bytes input
602 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
603 p = urllib.parse.urlsplit(uri)
604 self.assertEqual(p.netloc, b"")
605 self.assertEqual(p.username, None)
606 self.assertEqual(p.password, None)
607 self.assertEqual(p.hostname, None)
608 self.assertEqual(p.port, None)
609 self.assertEqual(p.geturl(), uri)
610
611 p = urllib.parse.urlparse(uri)
612 self.assertEqual(p.netloc, b"")
613 self.assertEqual(p.username, None)
614 self.assertEqual(p.password, None)
615 self.assertEqual(p.hostname, None)
616 self.assertEqual(p.port, None)
617 self.assertEqual(p.geturl(), uri)
618
Christian Heimesfaf2f632008-01-06 16:59:19 +0000619 def test_noslash(self):
620 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000621 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000622 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000623 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
624 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000625
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000626 def test_withoutscheme(self):
627 # Test urlparse without scheme
628 # Issue 754016: urlparse goes wrong with IP:port without scheme
629 # RFC 1808 specifies that netloc should start with //, urlparse expects
630 # the same, otherwise it classifies the portion of url as path.
631 self.assertEqual(urllib.parse.urlparse("path"),
632 ('','','path','','',''))
633 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
634 ('','www.python.org:80','','','',''))
635 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
636 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000637 # Repeat for bytes input
638 self.assertEqual(urllib.parse.urlparse(b"path"),
639 (b'',b'',b'path',b'',b'',b''))
640 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
641 (b'',b'www.python.org:80',b'',b'',b'',b''))
642 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
643 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000644
645 def test_portseparator(self):
646 # Issue 754016 makes changes for port separator ':' from scheme separator
647 self.assertEqual(urllib.parse.urlparse("path:80"),
648 ('','','path:80','','',''))
649 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
650 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
651 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
652 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000653 # As usual, need to check bytes input as well
654 self.assertEqual(urllib.parse.urlparse(b"path:80"),
655 (b'',b'',b'path:80',b'',b'',b''))
656 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
657 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
658 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
659 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000660
Facundo Batista2ac5de22008-07-07 18:24:11 +0000661 def test_usingsys(self):
662 # Issue 3314: sys module is used in the error
663 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
664
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000665 def test_anyscheme(self):
666 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000667 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
668 ('s3', 'foo.com', '/stuff', '', '', ''))
669 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
670 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800671 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
672 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
673 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
674 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
675
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000676 # And for bytes...
677 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
678 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
679 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
680 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800681 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
682 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
683 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
684 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000685
686 def test_mixed_types_rejected(self):
687 # Several functions that process either strings or ASCII encoded bytes
688 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000689 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000690 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000691 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000692 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000693 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000694 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000695 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000696 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000697 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000698 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000699 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000700 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000701 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000702 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000703 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000704 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000705 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000706 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000707 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000708 urllib.parse.urljoin(b"http://python.org", "http://python.org")
709
710 def _check_result_type(self, str_type):
711 num_args = len(str_type._fields)
712 bytes_type = str_type._encoded_counterpart
713 self.assertIs(bytes_type._decoded_counterpart, str_type)
714 str_args = ('',) * num_args
715 bytes_args = (b'',) * num_args
716 str_result = str_type(*str_args)
717 bytes_result = bytes_type(*bytes_args)
718 encoding = 'ascii'
719 errors = 'strict'
720 self.assertEqual(str_result, str_args)
721 self.assertEqual(bytes_result.decode(), str_args)
722 self.assertEqual(bytes_result.decode(), str_result)
723 self.assertEqual(bytes_result.decode(encoding), str_args)
724 self.assertEqual(bytes_result.decode(encoding), str_result)
725 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
726 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
727 self.assertEqual(bytes_result, bytes_args)
728 self.assertEqual(str_result.encode(), bytes_args)
729 self.assertEqual(str_result.encode(), bytes_result)
730 self.assertEqual(str_result.encode(encoding), bytes_args)
731 self.assertEqual(str_result.encode(encoding), bytes_result)
732 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
733 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
734
735 def test_result_pairs(self):
736 # Check encoding and decoding between result pairs
737 result_types = [
738 urllib.parse.DefragResult,
739 urllib.parse.SplitResult,
740 urllib.parse.ParseResult,
741 ]
742 for result_type in result_types:
743 self._check_result_type(result_type)
744
Victor Stinner1d87deb2011-01-14 13:05:19 +0000745 def test_parse_qs_encoding(self):
746 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
747 self.assertEqual(result, {'key': ['\u0141\xE9']})
748 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
749 self.assertEqual(result, {'key': ['\u0141\xE9']})
750 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
751 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
752 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
753 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
754 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
755 errors="ignore")
756 self.assertEqual(result, {'key': ['\u0141-']})
757
758 def test_parse_qsl_encoding(self):
759 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
760 self.assertEqual(result, [('key', '\u0141\xE9')])
761 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
762 self.assertEqual(result, [('key', '\u0141\xE9')])
763 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
764 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
765 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
766 self.assertEqual(result, [('key', '\u0141\ufffd-')])
767 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
768 errors="ignore")
769 self.assertEqual(result, [('key', '\u0141-')])
770
Senthil Kumarande02a712011-07-23 18:27:45 +0800771 def test_urlencode_sequences(self):
772 # Other tests incidentally urlencode things; test non-covered cases:
773 # Sequence and object values.
774 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100775 # we cannot rely on ordering here
776 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800777
778 class Trivial:
779 def __str__(self):
780 return 'trivial'
781
782 result = urllib.parse.urlencode({'a': Trivial()}, True)
783 self.assertEqual(result, 'a=trivial')
784
785 def test_quote_from_bytes(self):
786 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
787 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
788 self.assertEqual(result, 'archaeological%20arcana')
789 result = urllib.parse.quote_from_bytes(b'')
790 self.assertEqual(result, '')
791
792 def test_unquote_to_bytes(self):
793 result = urllib.parse.unquote_to_bytes('abc%20def')
794 self.assertEqual(result, b'abc def')
795 result = urllib.parse.unquote_to_bytes('')
796 self.assertEqual(result, b'')
797
798 def test_quote_errors(self):
799 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
800 encoding='utf-8')
801 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000802
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300803 def test_issue14072(self):
804 p1 = urllib.parse.urlsplit('tel:+31-641044153')
805 self.assertEqual(p1.scheme, 'tel')
806 self.assertEqual(p1.path, '+31-641044153')
807 p2 = urllib.parse.urlsplit('tel:+31641044153')
808 self.assertEqual(p2.scheme, 'tel')
809 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800810 # assert the behavior for urlparse
811 p1 = urllib.parse.urlparse('tel:+31-641044153')
812 self.assertEqual(p1.scheme, 'tel')
813 self.assertEqual(p1.path, '+31-641044153')
814 p2 = urllib.parse.urlparse('tel:+31641044153')
815 self.assertEqual(p2.scheme, 'tel')
816 self.assertEqual(p2.path, '+31641044153')
817
818 def test_telurl_params(self):
819 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
820 self.assertEqual(p1.scheme, 'tel')
821 self.assertEqual(p1.path, '123-4')
822 self.assertEqual(p1.params, 'phone-context=+1-650-516')
823
824 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
825 self.assertEqual(p1.scheme, 'tel')
826 self.assertEqual(p1.path, '+1-201-555-0123')
827 self.assertEqual(p1.params, '')
828
829 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
830 self.assertEqual(p1.scheme, 'tel')
831 self.assertEqual(p1.path, '7042')
832 self.assertEqual(p1.params, 'phone-context=example.com')
833
834 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
835 self.assertEqual(p1.scheme, 'tel')
836 self.assertEqual(p1.path, '863-1234')
837 self.assertEqual(p1.params, 'phone-context=+1-914-555')
838
R David Murrayf5163882013-03-21 20:56:51 -0400839 def test_Quoter_repr(self):
840 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
841 self.assertIn('Quoter', repr(quoter))
842
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000843
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200844class Utility_Tests(unittest.TestCase):
845 """Testcase to test the various utility functions in the urllib."""
846 # In Python 2 this test class was in test_urllib.
847
848 def test_splittype(self):
849 splittype = urllib.parse.splittype
850 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
851 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
852 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
853 self.assertEqual(splittype('type:'), ('type', ''))
854 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
855
856 def test_splithost(self):
857 splithost = urllib.parse.splithost
858 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
859 ('www.example.org:80', '/foo/bar/baz.html'))
860 self.assertEqual(splithost('//www.example.org:80'),
861 ('www.example.org:80', ''))
862 self.assertEqual(splithost('/foo/bar/baz.html'),
863 (None, '/foo/bar/baz.html'))
864
865 def test_splituser(self):
866 splituser = urllib.parse.splituser
867 self.assertEqual(splituser('User:Pass@www.python.org:080'),
868 ('User:Pass', 'www.python.org:080'))
869 self.assertEqual(splituser('@www.python.org:080'),
870 ('', 'www.python.org:080'))
871 self.assertEqual(splituser('www.python.org:080'),
872 (None, 'www.python.org:080'))
873 self.assertEqual(splituser('User:Pass@'),
874 ('User:Pass', ''))
875 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
876 ('User@example.com:Pass', 'www.python.org:080'))
877
878 def test_splitpasswd(self):
879 # Some of the password examples are not sensible, but it is added to
880 # confirming to RFC2617 and addressing issue4675.
881 splitpasswd = urllib.parse.splitpasswd
882 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
883 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
884 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
885 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
886 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
887 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
888 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
889 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
890 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
891 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
892 self.assertEqual(splitpasswd('user:'), ('user', ''))
893 self.assertEqual(splitpasswd('user'), ('user', None))
894 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
895
896 def test_splitport(self):
897 splitport = urllib.parse.splitport
898 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
899 self.assertEqual(splitport('parrot'), ('parrot', None))
900 self.assertEqual(splitport('parrot:'), ('parrot', None))
901 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
902 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
903 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
904 self.assertEqual(splitport('[::1]'), ('[::1]', None))
905 self.assertEqual(splitport(':88'), ('', '88'))
906
907 def test_splitnport(self):
908 splitnport = urllib.parse.splitnport
909 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
910 self.assertEqual(splitnport('parrot'), ('parrot', -1))
911 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
912 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
913 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
914 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
915 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
916 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
917 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
918
919 def test_splitquery(self):
920 # Normal cases are exercised by other tests; ensure that we also
921 # catch cases with no port specified (testcase ensuring coverage)
922 splitquery = urllib.parse.splitquery
923 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
924 ('http://python.org/fake', 'foo=bar'))
925 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
926 ('http://python.org/fake?foo=bar', ''))
927 self.assertEqual(splitquery('http://python.org/fake'),
928 ('http://python.org/fake', None))
929 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
930
931 def test_splittag(self):
932 splittag = urllib.parse.splittag
933 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
934 ('http://example.com?foo=bar', 'baz'))
935 self.assertEqual(splittag('http://example.com?foo=bar#'),
936 ('http://example.com?foo=bar', ''))
937 self.assertEqual(splittag('#baz'), ('', 'baz'))
938 self.assertEqual(splittag('http://example.com?foo=bar'),
939 ('http://example.com?foo=bar', None))
940 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
941 ('http://example.com?foo=bar#baz', 'boo'))
942
943 def test_splitattr(self):
944 splitattr = urllib.parse.splitattr
945 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
946 ('/path', ['attr1=value1', 'attr2=value2']))
947 self.assertEqual(splitattr('/path;'), ('/path', ['']))
948 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
949 ('', ['attr1=value1', 'attr2=value2']))
950 self.assertEqual(splitattr('/path'), ('/path', []))
951
952 def test_splitvalue(self):
953 # Normal cases are exercised by other tests; test pathological cases
954 # with no key/value pairs. (testcase ensuring coverage)
955 splitvalue = urllib.parse.splitvalue
956 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
957 self.assertEqual(splitvalue('foo='), ('foo', ''))
958 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
959 self.assertEqual(splitvalue('foobar'), ('foobar', None))
960 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
961
962 def test_to_bytes(self):
963 result = urllib.parse.to_bytes('http://www.python.org')
964 self.assertEqual(result, 'http://www.python.org')
965 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
966 'http://www.python.org/medi\u00e6val')
967
968 def test_unwrap(self):
969 url = urllib.parse.unwrap('<URL:type://host/path>')
970 self.assertEqual(url, 'type://host/path')
971
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000972
973if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200974 unittest.main()