blob: 378a427bc56194f34b1d7bc76044f31a7d9cb651 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00009RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +000010SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000011
Ezio Melottie130a522011-10-19 10:58:56 +030012# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000013# a string with the query and a dictionary with the expected result.
14
15parse_qsl_test_cases = [
16 ("", []),
17 ("&", []),
18 ("&&", []),
19 ("=", [('', '')]),
20 ("=a", [('', 'a')]),
21 ("a", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("a=", [('a', '')]),
24 ("&a=b", [('a', 'b')]),
25 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
26 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000027 (b"", []),
28 (b"&", []),
29 (b"&&", []),
30 (b"=", [(b'', b'')]),
31 (b"=a", [(b'', b'a')]),
32 (b"a", [(b'a', b'')]),
33 (b"a=", [(b'a', b'')]),
34 (b"a=", [(b'a', b'')]),
35 (b"&a=b", [(b'a', b'b')]),
36 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
37 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000038]
39
Skip Montanaro6ec967d2002-03-23 05:32:10 +000040class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041
42 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000044 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000045 t = (result.scheme, result.netloc, result.path,
46 result.params, result.query, result.fragment)
47 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000048 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000049 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000050 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000051 self.assertEqual(result2, result.geturl())
52
53 # the result of geturl() is a fixpoint; we can always parse it
54 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000056 self.assertEqual(result3.geturl(), result.geturl())
57 self.assertEqual(result3, result)
58 self.assertEqual(result3.scheme, result.scheme)
59 self.assertEqual(result3.netloc, result.netloc)
60 self.assertEqual(result3.path, result.path)
61 self.assertEqual(result3.params, result.params)
62 self.assertEqual(result3.query, result.query)
63 self.assertEqual(result3.fragment, result.fragment)
64 self.assertEqual(result3.username, result.username)
65 self.assertEqual(result3.password, result.password)
66 self.assertEqual(result3.hostname, result.hostname)
67 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068
69 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000070 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000071 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000072 t = (result.scheme, result.netloc, result.path,
73 result.query, result.fragment)
74 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000075 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000076 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000077 self.assertEqual(result2, result.geturl())
78
79 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000080 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000081 self.assertEqual(result3.geturl(), result.geturl())
82 self.assertEqual(result3, result)
83 self.assertEqual(result3.scheme, result.scheme)
84 self.assertEqual(result3.netloc, result.netloc)
85 self.assertEqual(result3.path, result.path)
86 self.assertEqual(result3.query, result.query)
87 self.assertEqual(result3.fragment, result.fragment)
88 self.assertEqual(result3.username, result.username)
89 self.assertEqual(result3.password, result.password)
90 self.assertEqual(result3.hostname, result.hostname)
91 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000092
Facundo Batistac469d4c2008-09-03 22:49:01 +000093 def test_qsl(self):
94 for orig, expect in parse_qsl_test_cases:
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080096 self.assertEqual(result, expect, "Error parsing %r" % orig)
97 expect_without_blanks = [v for v in expect if len(v[1])]
98 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
99 self.assertEqual(result, expect_without_blanks,
100 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000101
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000102 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000103 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000104 ('file:///tmp/junk.txt',
105 ('file', '', '/tmp/junk.txt', '', '', ''),
106 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000107 ('imap://mail.python.org/mbox1',
108 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
109 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000110 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000111 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
112 '', '', ''),
113 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
114 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000115 ('nfs://server/path/to/file.txt',
116 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
117 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000118 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
119 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
120 '', '', ''),
121 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000122 '', '')),
123 ('git+ssh://git@github.com/user/project.git',
124 ('git+ssh', 'git@github.com','/user/project.git',
125 '','',''),
126 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000127 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000128 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000129 def _encode(t):
130 return (t[0].encode('ascii'),
131 tuple(x.encode('ascii') for x in t[1]),
132 tuple(x.encode('ascii') for x in t[2]))
133 bytes_cases = [_encode(x) for x in str_cases]
134 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000135 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000136
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000137 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000138 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 # so we test both 'http:' and 'https:' in all the following.
140 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000141 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000142 ('://www.python.org',
143 ('www.python.org', '', '', '', ''),
144 ('www.python.org', '', '', '')),
145 ('://www.python.org#abc',
146 ('www.python.org', '', '', '', 'abc'),
147 ('www.python.org', '', '', 'abc')),
148 ('://www.python.org?q=abc',
149 ('www.python.org', '', '', 'q=abc', ''),
150 ('www.python.org', '', 'q=abc', '')),
151 ('://www.python.org/#abc',
152 ('www.python.org', '/', '', '', 'abc'),
153 ('www.python.org', '/', '', 'abc')),
154 ('://a/b/c/d;p?q#f',
155 ('a', '/b/c/d', 'p', 'q', 'f'),
156 ('a', '/b/c/d;p', 'q', 'f')),
157 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000158 def _encode(t):
159 return (t[0].encode('ascii'),
160 tuple(x.encode('ascii') for x in t[1]),
161 tuple(x.encode('ascii') for x in t[2]))
162 bytes_cases = [_encode(x) for x in str_cases]
163 str_schemes = ('http', 'https')
164 bytes_schemes = (b'http', b'https')
165 str_tests = str_schemes, str_cases
166 bytes_tests = bytes_schemes, bytes_cases
167 for schemes, test_cases in (str_tests, bytes_tests):
168 for scheme in schemes:
169 for url, parsed, split in test_cases:
170 url = scheme + url
171 parsed = (scheme,) + parsed
172 split = (scheme,) + split
173 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000174
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000175 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000176 str_components = (base, relurl, expected)
177 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
178 bytes_components = baseb, relurlb, expectedb = [
179 x.encode('ascii') for x in str_components]
180 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000181
182 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000183 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
184 bytes_cases = [x.encode('ascii') for x in str_cases]
185 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000186 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
187 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000188
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000189 def test_RFC1808(self):
190 # "normal" cases from RFC 1808:
191 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
192 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
193 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
194 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
195 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
196 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000197 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
198 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
199 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
200 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
201 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
202 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000203 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
204 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
205 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
206 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
207 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
208 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
209 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
210 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
211 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
212 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000213
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000214 # "abnormal" cases from RFC 1808:
215 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
216 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
217 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
218 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
219 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
220 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
221 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
222 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
223 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
224 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
225 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
226 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
227 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000228
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000229 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
230 # so we'll not actually run these tests (which expect 1808 behavior).
231 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
232 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000233
Senthil Kumaran397eb442011-04-15 18:20:24 +0800234 def test_RFC2368(self):
235 # Issue 11467: path that starts with a number is not parsed correctly
236 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
237 ('mailto', '', '1337@example.org', '', '', ''))
238
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000239 def test_RFC2396(self):
240 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000241
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000242
243 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
244 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
245 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
246 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
247 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
248 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
249 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
250 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
251 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
252 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
253 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
254 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
255 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
256 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
257 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
258 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
259 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
260 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
261 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
262 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
263 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
264 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
265 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
266 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
267 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
268 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
269 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
270 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
271 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
272 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
273 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
274 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
275 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
276 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
277 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
278 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
279 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
280 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
281 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
282
Facundo Batista23e38562008-08-14 16:55:14 +0000283 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000284 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000285 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
286 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000287 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
288 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
289 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
290 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
291 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
292 self.checkJoin(RFC3986_BASE, '//g','http://g')
293 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
294 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
295 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
296 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
297 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
298 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
299 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
300 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
301 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
302 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
303 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
304 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
305 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
306 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
307 self.checkJoin(RFC3986_BASE, '../..','http://a/')
308 self.checkJoin(RFC3986_BASE, '../../','http://a/')
309 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
310
311 #Abnormal Examples
312
313 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
314 # Tests are here for reference.
315
316 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
317 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
318 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
319 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
320
321 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
322 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
323 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
324 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
325 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
326 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
327 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
328 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
329 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
330 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
331 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
332 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
333 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
334 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
335 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
336 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000337
Senthil Kumarandca5b862010-12-17 04:48:45 +0000338 # Test for issue9721
339 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
340
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000341 def test_urljoins(self):
342 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
343 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
344 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
345 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
346 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
347 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
348 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
349 self.checkJoin(SIMPLE_BASE, '//g','http://g')
350 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
351 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
352 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
353 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
354 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
355 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
356 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
357 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
358 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
359 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
360 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
361 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
362 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
363 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
364 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
365 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
366 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
367 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
368 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
369 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
370 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800371 self.checkJoin('http:///', '..','http:///')
372 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
373 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800374 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800375 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000376
Senthil Kumaranad02d232010-04-16 03:02:13 +0000377 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000378 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000379 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
380 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
381 ('http://[::1]:5432/foo/', '::1', 5432),
382 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
383 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
384 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
385 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
386 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
387 ('http://[::ffff:12.34.56.78]:5432/foo/',
388 '::ffff:12.34.56.78', 5432),
389 ('http://Test.python.org/foo/', 'test.python.org', None),
390 ('http://12.34.56.78/foo/', '12.34.56.78', None),
391 ('http://[::1]/foo/', '::1', None),
392 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
393 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
394 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
395 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
396 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
397 ('http://[::ffff:12.34.56.78]/foo/',
398 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000399 ]
400 def _encode(t):
401 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
402 bytes_cases = [_encode(x) for x in str_cases]
403 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000404 urlparsed = urllib.parse.urlparse(url)
405 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
406
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000407 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000408 'http://::12.34.56.78]/',
409 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000410 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000411 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000412 'http://[::ffff:12.34.56.78']
413 bytes_cases = [x.encode('ascii') for x in str_cases]
414 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000415 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000416
Fred Drake70705652002-10-16 21:02:36 +0000417 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000418 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000419 ('http://python.org#frag', 'http://python.org', 'frag'),
420 ('http://python.org', 'http://python.org', ''),
421 ('http://python.org/#frag', 'http://python.org/', 'frag'),
422 ('http://python.org/', 'http://python.org/', ''),
423 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
424 ('http://python.org/?q', 'http://python.org/?q', ''),
425 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
426 ('http://python.org/p?q', 'http://python.org/p?q', ''),
427 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
428 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000429 ]
430 def _encode(t):
431 return type(t)(x.encode('ascii') for x in t)
432 bytes_cases = [_encode(x) for x in str_cases]
433 for url, defrag, frag in str_cases + bytes_cases:
434 result = urllib.parse.urldefrag(url)
435 self.assertEqual(result.geturl(), url)
436 self.assertEqual(result, (defrag, frag))
437 self.assertEqual(result.url, defrag)
438 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000439
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000440 def test_urlsplit_attributes(self):
441 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000442 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000443 self.assertEqual(p.scheme, "http")
444 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
445 self.assertEqual(p.path, "/doc/")
446 self.assertEqual(p.query, "")
447 self.assertEqual(p.fragment, "frag")
448 self.assertEqual(p.username, None)
449 self.assertEqual(p.password, None)
450 self.assertEqual(p.hostname, "www.python.org")
451 self.assertEqual(p.port, None)
452 # geturl() won't return exactly the original URL in this case
453 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000454 # We handle this by ignoring the first 4 characters of the URL
455 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000456
457 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000458 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000459 self.assertEqual(p.scheme, "http")
460 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
461 self.assertEqual(p.path, "/doc/")
462 self.assertEqual(p.query, "query=yes")
463 self.assertEqual(p.fragment, "frag")
464 self.assertEqual(p.username, "User")
465 self.assertEqual(p.password, "Pass")
466 self.assertEqual(p.hostname, "www.python.org")
467 self.assertEqual(p.port, 80)
468 self.assertEqual(p.geturl(), url)
469
Christian Heimesfaf2f632008-01-06 16:59:19 +0000470 # Addressing issue1698, which suggests Username can contain
471 # "@" characters. Though not RFC compliant, many ftp sites allow
472 # and request email addresses as usernames.
473
474 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000475 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000476 self.assertEqual(p.scheme, "http")
477 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
478 self.assertEqual(p.path, "/doc/")
479 self.assertEqual(p.query, "query=yes")
480 self.assertEqual(p.fragment, "frag")
481 self.assertEqual(p.username, "User@example.com")
482 self.assertEqual(p.password, "Pass")
483 self.assertEqual(p.hostname, "www.python.org")
484 self.assertEqual(p.port, 80)
485 self.assertEqual(p.geturl(), url)
486
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000487 # And check them all again, only with bytes this time
488 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
489 p = urllib.parse.urlsplit(url)
490 self.assertEqual(p.scheme, b"http")
491 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
492 self.assertEqual(p.path, b"/doc/")
493 self.assertEqual(p.query, b"")
494 self.assertEqual(p.fragment, b"frag")
495 self.assertEqual(p.username, None)
496 self.assertEqual(p.password, None)
497 self.assertEqual(p.hostname, b"www.python.org")
498 self.assertEqual(p.port, None)
499 self.assertEqual(p.geturl()[4:], url[4:])
500
501 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
502 p = urllib.parse.urlsplit(url)
503 self.assertEqual(p.scheme, b"http")
504 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
505 self.assertEqual(p.path, b"/doc/")
506 self.assertEqual(p.query, b"query=yes")
507 self.assertEqual(p.fragment, b"frag")
508 self.assertEqual(p.username, b"User")
509 self.assertEqual(p.password, b"Pass")
510 self.assertEqual(p.hostname, b"www.python.org")
511 self.assertEqual(p.port, 80)
512 self.assertEqual(p.geturl(), url)
513
514 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
515 p = urllib.parse.urlsplit(url)
516 self.assertEqual(p.scheme, b"http")
517 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
518 self.assertEqual(p.path, b"/doc/")
519 self.assertEqual(p.query, b"query=yes")
520 self.assertEqual(p.fragment, b"frag")
521 self.assertEqual(p.username, b"User@example.com")
522 self.assertEqual(p.password, b"Pass")
523 self.assertEqual(p.hostname, b"www.python.org")
524 self.assertEqual(p.port, 80)
525 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000526
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800527 # Verify an illegal port is returned as None
528 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
529 p = urllib.parse.urlsplit(url)
530 self.assertEqual(p.port, None)
531
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000532 def test_attributes_bad_port(self):
533 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000534 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000535 self.assertEqual(p.netloc, "www.example.net:foo")
536 self.assertRaises(ValueError, lambda: p.port)
537
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000538 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000539 self.assertEqual(p.netloc, "www.example.net:foo")
540 self.assertRaises(ValueError, lambda: p.port)
541
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000542 # Once again, repeat ourselves to test bytes
543 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
544 self.assertEqual(p.netloc, b"www.example.net:foo")
545 self.assertRaises(ValueError, lambda: p.port)
546
547 p = urllib.parse.urlparse(b"http://www.example.net:foo")
548 self.assertEqual(p.netloc, b"www.example.net:foo")
549 self.assertRaises(ValueError, lambda: p.port)
550
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000551 def test_attributes_without_netloc(self):
552 # This example is straight from RFC 3261. It looks like it
553 # should allow the username, hostname, and port to be filled
554 # in, but doesn't. Since it's a URI and doesn't use the
555 # scheme://netloc syntax, the netloc and related attributes
556 # should be left empty.
557 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000558 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000559 self.assertEqual(p.netloc, "")
560 self.assertEqual(p.username, None)
561 self.assertEqual(p.password, None)
562 self.assertEqual(p.hostname, None)
563 self.assertEqual(p.port, None)
564 self.assertEqual(p.geturl(), uri)
565
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000566 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 self.assertEqual(p.netloc, "")
568 self.assertEqual(p.username, None)
569 self.assertEqual(p.password, None)
570 self.assertEqual(p.hostname, None)
571 self.assertEqual(p.port, None)
572 self.assertEqual(p.geturl(), uri)
573
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000574 # You guessed it, repeating the test with bytes input
575 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
576 p = urllib.parse.urlsplit(uri)
577 self.assertEqual(p.netloc, b"")
578 self.assertEqual(p.username, None)
579 self.assertEqual(p.password, None)
580 self.assertEqual(p.hostname, None)
581 self.assertEqual(p.port, None)
582 self.assertEqual(p.geturl(), uri)
583
584 p = urllib.parse.urlparse(uri)
585 self.assertEqual(p.netloc, b"")
586 self.assertEqual(p.username, None)
587 self.assertEqual(p.password, None)
588 self.assertEqual(p.hostname, None)
589 self.assertEqual(p.port, None)
590 self.assertEqual(p.geturl(), uri)
591
Christian Heimesfaf2f632008-01-06 16:59:19 +0000592 def test_noslash(self):
593 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000594 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000595 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000596 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
597 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000598
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000599 def test_withoutscheme(self):
600 # Test urlparse without scheme
601 # Issue 754016: urlparse goes wrong with IP:port without scheme
602 # RFC 1808 specifies that netloc should start with //, urlparse expects
603 # the same, otherwise it classifies the portion of url as path.
604 self.assertEqual(urllib.parse.urlparse("path"),
605 ('','','path','','',''))
606 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
607 ('','www.python.org:80','','','',''))
608 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
609 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000610 # Repeat for bytes input
611 self.assertEqual(urllib.parse.urlparse(b"path"),
612 (b'',b'',b'path',b'',b'',b''))
613 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
614 (b'',b'www.python.org:80',b'',b'',b'',b''))
615 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
616 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000617
618 def test_portseparator(self):
619 # Issue 754016 makes changes for port separator ':' from scheme separator
620 self.assertEqual(urllib.parse.urlparse("path:80"),
621 ('','','path:80','','',''))
622 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
623 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
624 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
625 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000626 # As usual, need to check bytes input as well
627 self.assertEqual(urllib.parse.urlparse(b"path:80"),
628 (b'',b'',b'path:80',b'',b'',b''))
629 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
630 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
631 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
632 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000633
Facundo Batista2ac5de22008-07-07 18:24:11 +0000634 def test_usingsys(self):
635 # Issue 3314: sys module is used in the error
636 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
637
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000638 def test_anyscheme(self):
639 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000640 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
641 ('s3', 'foo.com', '/stuff', '', '', ''))
642 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
643 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800644 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
645 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
646 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
647 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
648
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000649 # And for bytes...
650 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
651 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
652 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
653 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800654 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
655 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
656 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
657 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000658
659 def test_mixed_types_rejected(self):
660 # Several functions that process either strings or ASCII encoded bytes
661 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000662 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000663 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000664 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000665 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000666 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000667 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000668 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000669 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000670 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000671 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000672 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000673 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000674 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000675 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000676 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000677 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000678 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000679 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000680 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000681 urllib.parse.urljoin(b"http://python.org", "http://python.org")
682
683 def _check_result_type(self, str_type):
684 num_args = len(str_type._fields)
685 bytes_type = str_type._encoded_counterpart
686 self.assertIs(bytes_type._decoded_counterpart, str_type)
687 str_args = ('',) * num_args
688 bytes_args = (b'',) * num_args
689 str_result = str_type(*str_args)
690 bytes_result = bytes_type(*bytes_args)
691 encoding = 'ascii'
692 errors = 'strict'
693 self.assertEqual(str_result, str_args)
694 self.assertEqual(bytes_result.decode(), str_args)
695 self.assertEqual(bytes_result.decode(), str_result)
696 self.assertEqual(bytes_result.decode(encoding), str_args)
697 self.assertEqual(bytes_result.decode(encoding), str_result)
698 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
699 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
700 self.assertEqual(bytes_result, bytes_args)
701 self.assertEqual(str_result.encode(), bytes_args)
702 self.assertEqual(str_result.encode(), bytes_result)
703 self.assertEqual(str_result.encode(encoding), bytes_args)
704 self.assertEqual(str_result.encode(encoding), bytes_result)
705 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
706 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
707
708 def test_result_pairs(self):
709 # Check encoding and decoding between result pairs
710 result_types = [
711 urllib.parse.DefragResult,
712 urllib.parse.SplitResult,
713 urllib.parse.ParseResult,
714 ]
715 for result_type in result_types:
716 self._check_result_type(result_type)
717
Victor Stinner1d87deb2011-01-14 13:05:19 +0000718 def test_parse_qs_encoding(self):
719 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
720 self.assertEqual(result, {'key': ['\u0141\xE9']})
721 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
722 self.assertEqual(result, {'key': ['\u0141\xE9']})
723 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
724 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
725 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
726 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
727 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
728 errors="ignore")
729 self.assertEqual(result, {'key': ['\u0141-']})
730
731 def test_parse_qsl_encoding(self):
732 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
733 self.assertEqual(result, [('key', '\u0141\xE9')])
734 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
735 self.assertEqual(result, [('key', '\u0141\xE9')])
736 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
737 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
738 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
739 self.assertEqual(result, [('key', '\u0141\ufffd-')])
740 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
741 errors="ignore")
742 self.assertEqual(result, [('key', '\u0141-')])
743
Senthil Kumarande02a712011-07-23 18:27:45 +0800744 def test_splitnport(self):
745 # Normal cases are exercised by other tests; ensure that we also
746 # catch cases with no port specified. (testcase ensuring coverage)
747 result = urllib.parse.splitnport('parrot:88')
748 self.assertEqual(result, ('parrot', 88))
749 result = urllib.parse.splitnport('parrot')
750 self.assertEqual(result, ('parrot', -1))
751 result = urllib.parse.splitnport('parrot', 55)
752 self.assertEqual(result, ('parrot', 55))
753 result = urllib.parse.splitnport('parrot:')
754 self.assertEqual(result, ('parrot', None))
755
756 def test_splitquery(self):
757 # Normal cases are exercised by other tests; ensure that we also
758 # catch cases with no port specified (testcase ensuring coverage)
759 result = urllib.parse.splitquery('http://python.org/fake?foo=bar')
760 self.assertEqual(result, ('http://python.org/fake', 'foo=bar'))
761 result = urllib.parse.splitquery('http://python.org/fake?foo=bar?')
762 self.assertEqual(result, ('http://python.org/fake?foo=bar', ''))
763 result = urllib.parse.splitquery('http://python.org/fake')
764 self.assertEqual(result, ('http://python.org/fake', None))
765
766 def test_splitvalue(self):
767 # Normal cases are exercised by other tests; test pathological cases
768 # with no key/value pairs. (testcase ensuring coverage)
769 result = urllib.parse.splitvalue('foo=bar')
770 self.assertEqual(result, ('foo', 'bar'))
771 result = urllib.parse.splitvalue('foo=')
772 self.assertEqual(result, ('foo', ''))
773 result = urllib.parse.splitvalue('foobar')
774 self.assertEqual(result, ('foobar', None))
775
776 def test_to_bytes(self):
777 result = urllib.parse.to_bytes('http://www.python.org')
778 self.assertEqual(result, 'http://www.python.org')
779 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
780 'http://www.python.org/medi\u00e6val')
781
782 def test_urlencode_sequences(self):
783 # Other tests incidentally urlencode things; test non-covered cases:
784 # Sequence and object values.
785 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100786 # we cannot rely on ordering here
787 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800788
789 class Trivial:
790 def __str__(self):
791 return 'trivial'
792
793 result = urllib.parse.urlencode({'a': Trivial()}, True)
794 self.assertEqual(result, 'a=trivial')
795
796 def test_quote_from_bytes(self):
797 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
798 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
799 self.assertEqual(result, 'archaeological%20arcana')
800 result = urllib.parse.quote_from_bytes(b'')
801 self.assertEqual(result, '')
802
803 def test_unquote_to_bytes(self):
804 result = urllib.parse.unquote_to_bytes('abc%20def')
805 self.assertEqual(result, b'abc def')
806 result = urllib.parse.unquote_to_bytes('')
807 self.assertEqual(result, b'')
808
809 def test_quote_errors(self):
810 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
811 encoding='utf-8')
812 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000813
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300814 def test_issue14072(self):
815 p1 = urllib.parse.urlsplit('tel:+31-641044153')
816 self.assertEqual(p1.scheme, 'tel')
817 self.assertEqual(p1.path, '+31-641044153')
818 p2 = urllib.parse.urlsplit('tel:+31641044153')
819 self.assertEqual(p2.scheme, 'tel')
820 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800821 # assert the behavior for urlparse
822 p1 = urllib.parse.urlparse('tel:+31-641044153')
823 self.assertEqual(p1.scheme, 'tel')
824 self.assertEqual(p1.path, '+31-641044153')
825 p2 = urllib.parse.urlparse('tel:+31641044153')
826 self.assertEqual(p2.scheme, 'tel')
827 self.assertEqual(p2.path, '+31641044153')
828
829 def test_telurl_params(self):
830 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
831 self.assertEqual(p1.scheme, 'tel')
832 self.assertEqual(p1.path, '123-4')
833 self.assertEqual(p1.params, 'phone-context=+1-650-516')
834
835 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
836 self.assertEqual(p1.scheme, 'tel')
837 self.assertEqual(p1.path, '+1-201-555-0123')
838 self.assertEqual(p1.params, '')
839
840 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
841 self.assertEqual(p1.scheme, 'tel')
842 self.assertEqual(p1.path, '7042')
843 self.assertEqual(p1.params, 'phone-context=example.com')
844
845 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
846 self.assertEqual(p1.scheme, 'tel')
847 self.assertEqual(p1.path, '863-1234')
848 self.assertEqual(p1.params, 'phone-context=+1-914-555')
849
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000850
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000851def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000852 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000853
854if __name__ == "__main__":
855 test_main()