blob: cb323d3423bea699a53b9a26724e1ed8681a6ac1 [file] [log] [blame]
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00004
Fred Drakea4d18a02001-01-05 05:57:04 +00005RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00006RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00007RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00008SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00009
Ezio Melottie130a522011-10-19 10:58:56 +030010# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000011# a string with the query and a dictionary with the expected result.
12
13parse_qsl_test_cases = [
14 ("", []),
15 ("&", []),
16 ("&&", []),
17 ("=", [('', '')]),
18 ("=a", [('', 'a')]),
19 ("a", [('a', '')]),
20 ("a=", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("&a=b", [('a', 'b')]),
23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000025 (b"", []),
26 (b"&", []),
27 (b"&&", []),
28 (b"=", [(b'', b'')]),
29 (b"=a", [(b'', b'a')]),
30 (b"a", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
32 (b"a=", [(b'a', b'')]),
33 (b"&a=b", [(b'a', b'b')]),
34 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
35 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000036]
37
Skip Montanaro6ec967d2002-03-23 05:32:10 +000038class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000039
40 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000042 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000043 t = (result.scheme, result.netloc, result.path,
44 result.params, result.query, result.fragment)
45 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000046 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000047 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000048 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000049 self.assertEqual(result2, result.geturl())
50
51 # the result of geturl() is a fixpoint; we can always parse it
52 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000054 self.assertEqual(result3.geturl(), result.geturl())
55 self.assertEqual(result3, result)
56 self.assertEqual(result3.scheme, result.scheme)
57 self.assertEqual(result3.netloc, result.netloc)
58 self.assertEqual(result3.path, result.path)
59 self.assertEqual(result3.params, result.params)
60 self.assertEqual(result3.query, result.query)
61 self.assertEqual(result3.fragment, result.fragment)
62 self.assertEqual(result3.username, result.username)
63 self.assertEqual(result3.password, result.password)
64 self.assertEqual(result3.hostname, result.hostname)
65 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000066
67 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000069 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000070 t = (result.scheme, result.netloc, result.path,
71 result.query, result.fragment)
72 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000073 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000074 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000075 self.assertEqual(result2, result.geturl())
76
77 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000078 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000079 self.assertEqual(result3.geturl(), result.geturl())
80 self.assertEqual(result3, result)
81 self.assertEqual(result3.scheme, result.scheme)
82 self.assertEqual(result3.netloc, result.netloc)
83 self.assertEqual(result3.path, result.path)
84 self.assertEqual(result3.query, result.query)
85 self.assertEqual(result3.fragment, result.fragment)
86 self.assertEqual(result3.username, result.username)
87 self.assertEqual(result3.password, result.password)
88 self.assertEqual(result3.hostname, result.hostname)
89 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000090
Facundo Batistac469d4c2008-09-03 22:49:01 +000091 def test_qsl(self):
92 for orig, expect in parse_qsl_test_cases:
93 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080094 self.assertEqual(result, expect, "Error parsing %r" % orig)
95 expect_without_blanks = [v for v in expect if len(v[1])]
96 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
97 self.assertEqual(result, expect_without_blanks,
98 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +000099
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000100 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000101 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000102 ('file:///tmp/junk.txt',
103 ('file', '', '/tmp/junk.txt', '', '', ''),
104 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000105 ('imap://mail.python.org/mbox1',
106 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
107 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000108 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000109 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
110 '', '', ''),
111 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
112 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000113 ('nfs://server/path/to/file.txt',
114 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
115 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000116 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
117 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
118 '', '', ''),
119 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000120 '', '')),
121 ('git+ssh://git@github.com/user/project.git',
122 ('git+ssh', 'git@github.com','/user/project.git',
123 '','',''),
124 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000125 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000126 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000127 def _encode(t):
128 return (t[0].encode('ascii'),
129 tuple(x.encode('ascii') for x in t[1]),
130 tuple(x.encode('ascii') for x in t[2]))
131 bytes_cases = [_encode(x) for x in str_cases]
132 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000133 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000134
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000135 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000137 # so we test both 'http:' and 'https:' in all the following.
138 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000139 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000140 ('://www.python.org',
141 ('www.python.org', '', '', '', ''),
142 ('www.python.org', '', '', '')),
143 ('://www.python.org#abc',
144 ('www.python.org', '', '', '', 'abc'),
145 ('www.python.org', '', '', 'abc')),
146 ('://www.python.org?q=abc',
147 ('www.python.org', '', '', 'q=abc', ''),
148 ('www.python.org', '', 'q=abc', '')),
149 ('://www.python.org/#abc',
150 ('www.python.org', '/', '', '', 'abc'),
151 ('www.python.org', '/', '', 'abc')),
152 ('://a/b/c/d;p?q#f',
153 ('a', '/b/c/d', 'p', 'q', 'f'),
154 ('a', '/b/c/d;p', 'q', 'f')),
155 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000156 def _encode(t):
157 return (t[0].encode('ascii'),
158 tuple(x.encode('ascii') for x in t[1]),
159 tuple(x.encode('ascii') for x in t[2]))
160 bytes_cases = [_encode(x) for x in str_cases]
161 str_schemes = ('http', 'https')
162 bytes_schemes = (b'http', b'https')
163 str_tests = str_schemes, str_cases
164 bytes_tests = bytes_schemes, bytes_cases
165 for schemes, test_cases in (str_tests, bytes_tests):
166 for scheme in schemes:
167 for url, parsed, split in test_cases:
168 url = scheme + url
169 parsed = (scheme,) + parsed
170 split = (scheme,) + split
171 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000172
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000173 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000174 str_components = (base, relurl, expected)
175 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
176 bytes_components = baseb, relurlb, expectedb = [
177 x.encode('ascii') for x in str_components]
178 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000179
180 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000181 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
182 bytes_cases = [x.encode('ascii') for x in str_cases]
183 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000184 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
185 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000186
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000187 def test_RFC1808(self):
188 # "normal" cases from RFC 1808:
189 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
190 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
191 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
192 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
193 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
194 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000195 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
196 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
197 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
198 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
199 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
200 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000201 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
202 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
203 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
204 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
205 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
206 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
207 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
208 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
209 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
210 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000211
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000212 # "abnormal" cases from RFC 1808:
213 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000214 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
215 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
216 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
217 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
218 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
219 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
220 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
221 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000222
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000223 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
224 # so we'll not actually run these tests (which expect 1808 behavior).
225 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
226 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000227
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400228 # XXX: The following tests are no longer compatible with RFC3986
229 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
230 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
231 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
232 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
233
234
Senthil Kumaran397eb442011-04-15 18:20:24 +0800235 def test_RFC2368(self):
236 # Issue 11467: path that starts with a number is not parsed correctly
237 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
238 ('mailto', '', '1337@example.org', '', '', ''))
239
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000240 def test_RFC2396(self):
241 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000242
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000243
244 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
245 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
246 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
247 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
248 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
249 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
250 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
251 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
252 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
253 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
254 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
255 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
256 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
257 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
258 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
259 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
260 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
261 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
262 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
263 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
264 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000265 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
266 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
267 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
268 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
269 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
270 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
271 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
272 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
273 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
274 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
275 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
276 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
277 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
278 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
279
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400280 # XXX: The following tests are no longer compatible with RFC3986
281 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
282 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
283 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
284 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
285
286
Facundo Batista23e38562008-08-14 16:55:14 +0000287 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000288 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000289 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400290 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000291 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
292 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
293 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
294 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
295 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
296 self.checkJoin(RFC3986_BASE, '//g','http://g')
297 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
298 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
299 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
300 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
301 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
302 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
303 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
304 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
305 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
306 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
307 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
308 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
309 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
310 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
311 self.checkJoin(RFC3986_BASE, '../..','http://a/')
312 self.checkJoin(RFC3986_BASE, '../../','http://a/')
313 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400314 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000315
316 #Abnormal Examples
317
318 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
319 # Tests are here for reference.
320
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400321 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
322 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
323 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
324 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000325 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
326 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
327 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
328 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
329 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
330 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
331 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
332 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
333 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
334 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
335 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
336 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
337 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
338 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
339 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
340 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000341
Senthil Kumarandca5b862010-12-17 04:48:45 +0000342 # Test for issue9721
343 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
344
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000345 def test_urljoins(self):
346 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
347 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
348 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
349 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
350 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
351 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
352 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
353 self.checkJoin(SIMPLE_BASE, '//g','http://g')
354 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
355 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
356 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
357 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
358 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
359 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
360 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
361 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
362 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
363 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000364 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
365 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000366 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
367 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
368 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
369 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
370 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
371 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
372 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800373 self.checkJoin('http:///', '..','http:///')
374 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
375 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800376 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800377 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000378
Antoine Pitrou55ac5b32014-08-21 19:16:17 -0400379 # XXX: The following tests are no longer compatible with RFC3986
380 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
381 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
382
Senthil Kumarana66e3882014-09-22 15:49:16 +0800383 # test for issue22118 duplicate slashes
384 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
385
386 # Non-RFC-defined tests, covering variations of base and trailing
387 # slashes
388 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
389 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
390 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
391 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
392 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
393 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
394
Senthil Kumaranad02d232010-04-16 03:02:13 +0000395 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000396 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000397 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
398 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
399 ('http://[::1]:5432/foo/', '::1', 5432),
400 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
401 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
402 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
403 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
404 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
405 ('http://[::ffff:12.34.56.78]:5432/foo/',
406 '::ffff:12.34.56.78', 5432),
407 ('http://Test.python.org/foo/', 'test.python.org', None),
408 ('http://12.34.56.78/foo/', '12.34.56.78', None),
409 ('http://[::1]/foo/', '::1', None),
410 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
411 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
412 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
413 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
414 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
415 ('http://[::ffff:12.34.56.78]/foo/',
416 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200417 ('http://Test.python.org:/foo/', 'test.python.org', None),
418 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
419 ('http://[::1]:/foo/', '::1', None),
420 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
421 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
422 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
423 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
424 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
425 ('http://[::ffff:12.34.56.78]:/foo/',
426 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000427 ]
428 def _encode(t):
429 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
430 bytes_cases = [_encode(x) for x in str_cases]
431 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000432 urlparsed = urllib.parse.urlparse(url)
433 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
434
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000435 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000436 'http://::12.34.56.78]/',
437 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000438 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000439 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000440 'http://[::ffff:12.34.56.78']
441 bytes_cases = [x.encode('ascii') for x in str_cases]
442 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000443 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000444
Fred Drake70705652002-10-16 21:02:36 +0000445 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000446 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000447 ('http://python.org#frag', 'http://python.org', 'frag'),
448 ('http://python.org', 'http://python.org', ''),
449 ('http://python.org/#frag', 'http://python.org/', 'frag'),
450 ('http://python.org/', 'http://python.org/', ''),
451 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
452 ('http://python.org/?q', 'http://python.org/?q', ''),
453 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
454 ('http://python.org/p?q', 'http://python.org/p?q', ''),
455 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
456 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000457 ]
458 def _encode(t):
459 return type(t)(x.encode('ascii') for x in t)
460 bytes_cases = [_encode(x) for x in str_cases]
461 for url, defrag, frag in str_cases + bytes_cases:
462 result = urllib.parse.urldefrag(url)
463 self.assertEqual(result.geturl(), url)
464 self.assertEqual(result, (defrag, frag))
465 self.assertEqual(result.url, defrag)
466 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000467
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000468 def test_urlsplit_attributes(self):
469 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000470 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000471 self.assertEqual(p.scheme, "http")
472 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
473 self.assertEqual(p.path, "/doc/")
474 self.assertEqual(p.query, "")
475 self.assertEqual(p.fragment, "frag")
476 self.assertEqual(p.username, None)
477 self.assertEqual(p.password, None)
478 self.assertEqual(p.hostname, "www.python.org")
479 self.assertEqual(p.port, None)
480 # geturl() won't return exactly the original URL in this case
481 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000482 # We handle this by ignoring the first 4 characters of the URL
483 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000484
485 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000486 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000487 self.assertEqual(p.scheme, "http")
488 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
489 self.assertEqual(p.path, "/doc/")
490 self.assertEqual(p.query, "query=yes")
491 self.assertEqual(p.fragment, "frag")
492 self.assertEqual(p.username, "User")
493 self.assertEqual(p.password, "Pass")
494 self.assertEqual(p.hostname, "www.python.org")
495 self.assertEqual(p.port, 80)
496 self.assertEqual(p.geturl(), url)
497
Christian Heimesfaf2f632008-01-06 16:59:19 +0000498 # Addressing issue1698, which suggests Username can contain
499 # "@" characters. Though not RFC compliant, many ftp sites allow
500 # and request email addresses as usernames.
501
502 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000503 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000504 self.assertEqual(p.scheme, "http")
505 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
506 self.assertEqual(p.path, "/doc/")
507 self.assertEqual(p.query, "query=yes")
508 self.assertEqual(p.fragment, "frag")
509 self.assertEqual(p.username, "User@example.com")
510 self.assertEqual(p.password, "Pass")
511 self.assertEqual(p.hostname, "www.python.org")
512 self.assertEqual(p.port, 80)
513 self.assertEqual(p.geturl(), url)
514
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000515 # And check them all again, only with bytes this time
516 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
517 p = urllib.parse.urlsplit(url)
518 self.assertEqual(p.scheme, b"http")
519 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
520 self.assertEqual(p.path, b"/doc/")
521 self.assertEqual(p.query, b"")
522 self.assertEqual(p.fragment, b"frag")
523 self.assertEqual(p.username, None)
524 self.assertEqual(p.password, None)
525 self.assertEqual(p.hostname, b"www.python.org")
526 self.assertEqual(p.port, None)
527 self.assertEqual(p.geturl()[4:], url[4:])
528
529 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
530 p = urllib.parse.urlsplit(url)
531 self.assertEqual(p.scheme, b"http")
532 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
533 self.assertEqual(p.path, b"/doc/")
534 self.assertEqual(p.query, b"query=yes")
535 self.assertEqual(p.fragment, b"frag")
536 self.assertEqual(p.username, b"User")
537 self.assertEqual(p.password, b"Pass")
538 self.assertEqual(p.hostname, b"www.python.org")
539 self.assertEqual(p.port, 80)
540 self.assertEqual(p.geturl(), url)
541
542 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
543 p = urllib.parse.urlsplit(url)
544 self.assertEqual(p.scheme, b"http")
545 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
546 self.assertEqual(p.path, b"/doc/")
547 self.assertEqual(p.query, b"query=yes")
548 self.assertEqual(p.fragment, b"frag")
549 self.assertEqual(p.username, b"User@example.com")
550 self.assertEqual(p.password, b"Pass")
551 self.assertEqual(p.hostname, b"www.python.org")
552 self.assertEqual(p.port, 80)
553 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000554
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800555 # Verify an illegal port is returned as None
556 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
557 p = urllib.parse.urlsplit(url)
558 self.assertEqual(p.port, None)
559
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000560 def test_attributes_bad_port(self):
561 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000562 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000563 self.assertEqual(p.netloc, "www.example.net:foo")
564 self.assertRaises(ValueError, lambda: p.port)
565
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000566 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 self.assertEqual(p.netloc, "www.example.net:foo")
568 self.assertRaises(ValueError, lambda: p.port)
569
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000570 # Once again, repeat ourselves to test bytes
571 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
572 self.assertEqual(p.netloc, b"www.example.net:foo")
573 self.assertRaises(ValueError, lambda: p.port)
574
575 p = urllib.parse.urlparse(b"http://www.example.net:foo")
576 self.assertEqual(p.netloc, b"www.example.net:foo")
577 self.assertRaises(ValueError, lambda: p.port)
578
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000579 def test_attributes_without_netloc(self):
580 # This example is straight from RFC 3261. It looks like it
581 # should allow the username, hostname, and port to be filled
582 # in, but doesn't. Since it's a URI and doesn't use the
583 # scheme://netloc syntax, the netloc and related attributes
584 # should be left empty.
585 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000586 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000587 self.assertEqual(p.netloc, "")
588 self.assertEqual(p.username, None)
589 self.assertEqual(p.password, None)
590 self.assertEqual(p.hostname, None)
591 self.assertEqual(p.port, None)
592 self.assertEqual(p.geturl(), uri)
593
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000594 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000595 self.assertEqual(p.netloc, "")
596 self.assertEqual(p.username, None)
597 self.assertEqual(p.password, None)
598 self.assertEqual(p.hostname, None)
599 self.assertEqual(p.port, None)
600 self.assertEqual(p.geturl(), uri)
601
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000602 # You guessed it, repeating the test with bytes input
603 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
604 p = urllib.parse.urlsplit(uri)
605 self.assertEqual(p.netloc, b"")
606 self.assertEqual(p.username, None)
607 self.assertEqual(p.password, None)
608 self.assertEqual(p.hostname, None)
609 self.assertEqual(p.port, None)
610 self.assertEqual(p.geturl(), uri)
611
612 p = urllib.parse.urlparse(uri)
613 self.assertEqual(p.netloc, b"")
614 self.assertEqual(p.username, None)
615 self.assertEqual(p.password, None)
616 self.assertEqual(p.hostname, None)
617 self.assertEqual(p.port, None)
618 self.assertEqual(p.geturl(), uri)
619
Christian Heimesfaf2f632008-01-06 16:59:19 +0000620 def test_noslash(self):
621 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000622 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000623 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000624 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
625 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000626
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000627 def test_withoutscheme(self):
628 # Test urlparse without scheme
629 # Issue 754016: urlparse goes wrong with IP:port without scheme
630 # RFC 1808 specifies that netloc should start with //, urlparse expects
631 # the same, otherwise it classifies the portion of url as path.
632 self.assertEqual(urllib.parse.urlparse("path"),
633 ('','','path','','',''))
634 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
635 ('','www.python.org:80','','','',''))
636 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
637 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000638 # Repeat for bytes input
639 self.assertEqual(urllib.parse.urlparse(b"path"),
640 (b'',b'',b'path',b'',b'',b''))
641 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
642 (b'',b'www.python.org:80',b'',b'',b'',b''))
643 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
644 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000645
646 def test_portseparator(self):
647 # Issue 754016 makes changes for port separator ':' from scheme separator
648 self.assertEqual(urllib.parse.urlparse("path:80"),
649 ('','','path:80','','',''))
650 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
651 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
652 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
653 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000654 # As usual, need to check bytes input as well
655 self.assertEqual(urllib.parse.urlparse(b"path:80"),
656 (b'',b'',b'path:80',b'',b'',b''))
657 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
658 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
659 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
660 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000661
Facundo Batista2ac5de22008-07-07 18:24:11 +0000662 def test_usingsys(self):
663 # Issue 3314: sys module is used in the error
664 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
665
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000666 def test_anyscheme(self):
667 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000668 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
669 ('s3', 'foo.com', '/stuff', '', '', ''))
670 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
671 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800672 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
673 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
674 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
675 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
676
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000677 # And for bytes...
678 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
679 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
680 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
681 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800682 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
683 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
684 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
685 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000686
687 def test_mixed_types_rejected(self):
688 # Several functions that process either strings or ASCII encoded bytes
689 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000690 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000691 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000692 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000693 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000694 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000695 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000696 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000697 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000698 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000699 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000700 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000701 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000702 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000703 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000704 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000705 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000706 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000707 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000708 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000709 urllib.parse.urljoin(b"http://python.org", "http://python.org")
710
711 def _check_result_type(self, str_type):
712 num_args = len(str_type._fields)
713 bytes_type = str_type._encoded_counterpart
714 self.assertIs(bytes_type._decoded_counterpart, str_type)
715 str_args = ('',) * num_args
716 bytes_args = (b'',) * num_args
717 str_result = str_type(*str_args)
718 bytes_result = bytes_type(*bytes_args)
719 encoding = 'ascii'
720 errors = 'strict'
721 self.assertEqual(str_result, str_args)
722 self.assertEqual(bytes_result.decode(), str_args)
723 self.assertEqual(bytes_result.decode(), str_result)
724 self.assertEqual(bytes_result.decode(encoding), str_args)
725 self.assertEqual(bytes_result.decode(encoding), str_result)
726 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
727 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
728 self.assertEqual(bytes_result, bytes_args)
729 self.assertEqual(str_result.encode(), bytes_args)
730 self.assertEqual(str_result.encode(), bytes_result)
731 self.assertEqual(str_result.encode(encoding), bytes_args)
732 self.assertEqual(str_result.encode(encoding), bytes_result)
733 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
734 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
735
736 def test_result_pairs(self):
737 # Check encoding and decoding between result pairs
738 result_types = [
739 urllib.parse.DefragResult,
740 urllib.parse.SplitResult,
741 urllib.parse.ParseResult,
742 ]
743 for result_type in result_types:
744 self._check_result_type(result_type)
745
Victor Stinner1d87deb2011-01-14 13:05:19 +0000746 def test_parse_qs_encoding(self):
747 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
748 self.assertEqual(result, {'key': ['\u0141\xE9']})
749 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
750 self.assertEqual(result, {'key': ['\u0141\xE9']})
751 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
752 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
753 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
754 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
755 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
756 errors="ignore")
757 self.assertEqual(result, {'key': ['\u0141-']})
758
759 def test_parse_qsl_encoding(self):
760 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
761 self.assertEqual(result, [('key', '\u0141\xE9')])
762 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
763 self.assertEqual(result, [('key', '\u0141\xE9')])
764 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
765 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
766 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
767 self.assertEqual(result, [('key', '\u0141\ufffd-')])
768 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
769 errors="ignore")
770 self.assertEqual(result, [('key', '\u0141-')])
771
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200772 def test_splitport(self):
773 splitport = urllib.parse.splitport
774 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
775 self.assertEqual(splitport('parrot'), ('parrot', None))
776 self.assertEqual(splitport('parrot:'), ('parrot', None))
777 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
778 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
779
Senthil Kumarande02a712011-07-23 18:27:45 +0800780 def test_splitnport(self):
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200781 splitnport = urllib.parse.splitnport
782 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
783 self.assertEqual(splitnport('parrot'), ('parrot', -1))
784 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
785 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
786 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
787 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
788 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
789 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
790 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
Senthil Kumarande02a712011-07-23 18:27:45 +0800791
792 def test_splitquery(self):
793 # Normal cases are exercised by other tests; ensure that we also
794 # catch cases with no port specified (testcase ensuring coverage)
795 result = urllib.parse.splitquery('http://python.org/fake?foo=bar')
796 self.assertEqual(result, ('http://python.org/fake', 'foo=bar'))
797 result = urllib.parse.splitquery('http://python.org/fake?foo=bar?')
798 self.assertEqual(result, ('http://python.org/fake?foo=bar', ''))
799 result = urllib.parse.splitquery('http://python.org/fake')
800 self.assertEqual(result, ('http://python.org/fake', None))
801
802 def test_splitvalue(self):
803 # Normal cases are exercised by other tests; test pathological cases
804 # with no key/value pairs. (testcase ensuring coverage)
805 result = urllib.parse.splitvalue('foo=bar')
806 self.assertEqual(result, ('foo', 'bar'))
807 result = urllib.parse.splitvalue('foo=')
808 self.assertEqual(result, ('foo', ''))
809 result = urllib.parse.splitvalue('foobar')
810 self.assertEqual(result, ('foobar', None))
811
812 def test_to_bytes(self):
813 result = urllib.parse.to_bytes('http://www.python.org')
814 self.assertEqual(result, 'http://www.python.org')
815 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
816 'http://www.python.org/medi\u00e6val')
817
818 def test_urlencode_sequences(self):
819 # Other tests incidentally urlencode things; test non-covered cases:
820 # Sequence and object values.
821 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100822 # we cannot rely on ordering here
823 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800824
825 class Trivial:
826 def __str__(self):
827 return 'trivial'
828
829 result = urllib.parse.urlencode({'a': Trivial()}, True)
830 self.assertEqual(result, 'a=trivial')
831
832 def test_quote_from_bytes(self):
833 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
834 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
835 self.assertEqual(result, 'archaeological%20arcana')
836 result = urllib.parse.quote_from_bytes(b'')
837 self.assertEqual(result, '')
838
839 def test_unquote_to_bytes(self):
840 result = urllib.parse.unquote_to_bytes('abc%20def')
841 self.assertEqual(result, b'abc def')
842 result = urllib.parse.unquote_to_bytes('')
843 self.assertEqual(result, b'')
844
845 def test_quote_errors(self):
846 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
847 encoding='utf-8')
848 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000849
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300850 def test_issue14072(self):
851 p1 = urllib.parse.urlsplit('tel:+31-641044153')
852 self.assertEqual(p1.scheme, 'tel')
853 self.assertEqual(p1.path, '+31-641044153')
854 p2 = urllib.parse.urlsplit('tel:+31641044153')
855 self.assertEqual(p2.scheme, 'tel')
856 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800857 # assert the behavior for urlparse
858 p1 = urllib.parse.urlparse('tel:+31-641044153')
859 self.assertEqual(p1.scheme, 'tel')
860 self.assertEqual(p1.path, '+31-641044153')
861 p2 = urllib.parse.urlparse('tel:+31641044153')
862 self.assertEqual(p2.scheme, 'tel')
863 self.assertEqual(p2.path, '+31641044153')
864
865 def test_telurl_params(self):
866 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
867 self.assertEqual(p1.scheme, 'tel')
868 self.assertEqual(p1.path, '123-4')
869 self.assertEqual(p1.params, 'phone-context=+1-650-516')
870
871 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
872 self.assertEqual(p1.scheme, 'tel')
873 self.assertEqual(p1.path, '+1-201-555-0123')
874 self.assertEqual(p1.params, '')
875
876 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
877 self.assertEqual(p1.scheme, 'tel')
878 self.assertEqual(p1.path, '7042')
879 self.assertEqual(p1.params, 'phone-context=example.com')
880
881 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
882 self.assertEqual(p1.scheme, 'tel')
883 self.assertEqual(p1.path, '863-1234')
884 self.assertEqual(p1.params, 'phone-context=+1-914-555')
885
R David Murrayf5163882013-03-21 20:56:51 -0400886 def test_unwrap(self):
887 url = urllib.parse.unwrap('<URL:type://host/path>')
888 self.assertEqual(url, 'type://host/path')
889
890 def test_Quoter_repr(self):
891 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
892 self.assertIn('Quoter', repr(quoter))
893
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000894
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000895def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000896 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000897
898if __name__ == "__main__":
899 test_main()