blob: 393481148d2e0e868efb3f3d4440c4579e58cfc1 [file] [log] [blame]
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00003import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00004
Fred Drakea4d18a02001-01-05 05:57:04 +00005RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00006RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00007RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00008SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00009
Ezio Melottie130a522011-10-19 10:58:56 +030010# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000011# a string with the query and a dictionary with the expected result.
12
13parse_qsl_test_cases = [
14 ("", []),
15 ("&", []),
16 ("&&", []),
17 ("=", [('', '')]),
18 ("=a", [('', 'a')]),
19 ("a", [('a', '')]),
20 ("a=", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("&a=b", [('a', 'b')]),
23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000025 (b"", []),
26 (b"&", []),
27 (b"&&", []),
28 (b"=", [(b'', b'')]),
29 (b"=a", [(b'', b'a')]),
30 (b"a", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
32 (b"a=", [(b'a', b'')]),
33 (b"&a=b", [(b'a', b'b')]),
34 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
35 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000036]
37
Skip Montanaro6ec967d2002-03-23 05:32:10 +000038class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000039
40 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000041 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000042 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000043 t = (result.scheme, result.netloc, result.path,
44 result.params, result.query, result.fragment)
45 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000046 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000047 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000048 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000049 self.assertEqual(result2, result.geturl())
50
51 # the result of geturl() is a fixpoint; we can always parse it
52 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000053 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000054 self.assertEqual(result3.geturl(), result.geturl())
55 self.assertEqual(result3, result)
56 self.assertEqual(result3.scheme, result.scheme)
57 self.assertEqual(result3.netloc, result.netloc)
58 self.assertEqual(result3.path, result.path)
59 self.assertEqual(result3.params, result.params)
60 self.assertEqual(result3.query, result.query)
61 self.assertEqual(result3.fragment, result.fragment)
62 self.assertEqual(result3.username, result.username)
63 self.assertEqual(result3.password, result.password)
64 self.assertEqual(result3.hostname, result.hostname)
65 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000066
67 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000068 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000069 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000070 t = (result.scheme, result.netloc, result.path,
71 result.query, result.fragment)
72 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000073 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000074 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000075 self.assertEqual(result2, result.geturl())
76
77 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000078 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000079 self.assertEqual(result3.geturl(), result.geturl())
80 self.assertEqual(result3, result)
81 self.assertEqual(result3.scheme, result.scheme)
82 self.assertEqual(result3.netloc, result.netloc)
83 self.assertEqual(result3.path, result.path)
84 self.assertEqual(result3.query, result.query)
85 self.assertEqual(result3.fragment, result.fragment)
86 self.assertEqual(result3.username, result.username)
87 self.assertEqual(result3.password, result.password)
88 self.assertEqual(result3.hostname, result.hostname)
89 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000090
Facundo Batistac469d4c2008-09-03 22:49:01 +000091 def test_qsl(self):
92 for orig, expect in parse_qsl_test_cases:
93 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080094 self.assertEqual(result, expect, "Error parsing %r" % orig)
95 expect_without_blanks = [v for v in expect if len(v[1])]
96 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
97 self.assertEqual(result, expect_without_blanks,
98 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +000099
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000100 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000101 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000102 ('file:///tmp/junk.txt',
103 ('file', '', '/tmp/junk.txt', '', '', ''),
104 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000105 ('imap://mail.python.org/mbox1',
106 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
107 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000108 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000109 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
110 '', '', ''),
111 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
112 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000113 ('nfs://server/path/to/file.txt',
114 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
115 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000116 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
117 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
118 '', '', ''),
119 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000120 '', '')),
121 ('git+ssh://git@github.com/user/project.git',
122 ('git+ssh', 'git@github.com','/user/project.git',
123 '','',''),
124 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000125 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000126 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000127 def _encode(t):
128 return (t[0].encode('ascii'),
129 tuple(x.encode('ascii') for x in t[1]),
130 tuple(x.encode('ascii') for x in t[2]))
131 bytes_cases = [_encode(x) for x in str_cases]
132 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000133 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000134
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000135 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000136 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000137 # so we test both 'http:' and 'https:' in all the following.
138 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000139 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000140 ('://www.python.org',
141 ('www.python.org', '', '', '', ''),
142 ('www.python.org', '', '', '')),
143 ('://www.python.org#abc',
144 ('www.python.org', '', '', '', 'abc'),
145 ('www.python.org', '', '', 'abc')),
146 ('://www.python.org?q=abc',
147 ('www.python.org', '', '', 'q=abc', ''),
148 ('www.python.org', '', 'q=abc', '')),
149 ('://www.python.org/#abc',
150 ('www.python.org', '/', '', '', 'abc'),
151 ('www.python.org', '/', '', 'abc')),
152 ('://a/b/c/d;p?q#f',
153 ('a', '/b/c/d', 'p', 'q', 'f'),
154 ('a', '/b/c/d;p', 'q', 'f')),
155 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000156 def _encode(t):
157 return (t[0].encode('ascii'),
158 tuple(x.encode('ascii') for x in t[1]),
159 tuple(x.encode('ascii') for x in t[2]))
160 bytes_cases = [_encode(x) for x in str_cases]
161 str_schemes = ('http', 'https')
162 bytes_schemes = (b'http', b'https')
163 str_tests = str_schemes, str_cases
164 bytes_tests = bytes_schemes, bytes_cases
165 for schemes, test_cases in (str_tests, bytes_tests):
166 for scheme in schemes:
167 for url, parsed, split in test_cases:
168 url = scheme + url
169 parsed = (scheme,) + parsed
170 split = (scheme,) + split
171 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000172
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000173 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000174 str_components = (base, relurl, expected)
175 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
176 bytes_components = baseb, relurlb, expectedb = [
177 x.encode('ascii') for x in str_components]
178 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000179
180 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000181 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
182 bytes_cases = [x.encode('ascii') for x in str_cases]
183 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000184 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
185 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000186
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000187 def test_RFC1808(self):
188 # "normal" cases from RFC 1808:
189 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
190 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
191 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
192 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
193 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
194 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000195 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
196 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
197 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
198 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
199 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
200 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000201 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
202 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
203 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
204 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
205 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
206 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
207 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
208 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
209 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
210 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000211
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000212 # "abnormal" cases from RFC 1808:
213 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
214 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
215 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
216 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
217 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
218 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
219 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
220 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
221 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
222 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
223 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
224 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
225 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000226
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000227 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
228 # so we'll not actually run these tests (which expect 1808 behavior).
229 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
230 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000231
Senthil Kumaran397eb442011-04-15 18:20:24 +0800232 def test_RFC2368(self):
233 # Issue 11467: path that starts with a number is not parsed correctly
234 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
235 ('mailto', '', '1337@example.org', '', '', ''))
236
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000237 def test_RFC2396(self):
238 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000239
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000240
241 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
242 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
243 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
244 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
245 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
246 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
247 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
248 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
249 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
250 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
251 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
252 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
253 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
254 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
255 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
256 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
257 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
258 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
259 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
260 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
261 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
262 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
263 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
264 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
265 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
266 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
267 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
268 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
269 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
270 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
271 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
272 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
273 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
274 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
275 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
276 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
277 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
278 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
279 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
280
Facundo Batista23e38562008-08-14 16:55:14 +0000281 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000282 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000283 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
284 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000285 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
286 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
287 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
288 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
289 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
290 self.checkJoin(RFC3986_BASE, '//g','http://g')
291 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
292 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
293 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
294 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
295 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
296 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
297 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
298 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
299 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
300 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
301 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
302 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
303 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
304 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
305 self.checkJoin(RFC3986_BASE, '../..','http://a/')
306 self.checkJoin(RFC3986_BASE, '../../','http://a/')
307 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
308
309 #Abnormal Examples
310
311 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
312 # Tests are here for reference.
313
314 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
315 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
316 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
317 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
318
319 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
320 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
321 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
322 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
323 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
324 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
325 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
326 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
327 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
328 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
329 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
330 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
331 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
332 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
333 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
334 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000335
Senthil Kumarandca5b862010-12-17 04:48:45 +0000336 # Test for issue9721
337 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
338
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000339 def test_urljoins(self):
340 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
341 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
342 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
343 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
344 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
345 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
346 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
347 self.checkJoin(SIMPLE_BASE, '//g','http://g')
348 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
349 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
350 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
351 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
352 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
353 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
354 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
355 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
356 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
357 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
358 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
359 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
360 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
361 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
362 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
363 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
364 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
365 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
366 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
367 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
368 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800369 self.checkJoin('http:///', '..','http:///')
370 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
371 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800372 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800373 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000374
Senthil Kumaranad02d232010-04-16 03:02:13 +0000375 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000376 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000377 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
378 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
379 ('http://[::1]:5432/foo/', '::1', 5432),
380 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
381 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
382 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
383 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
384 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
385 ('http://[::ffff:12.34.56.78]:5432/foo/',
386 '::ffff:12.34.56.78', 5432),
387 ('http://Test.python.org/foo/', 'test.python.org', None),
388 ('http://12.34.56.78/foo/', '12.34.56.78', None),
389 ('http://[::1]/foo/', '::1', None),
390 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
391 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
392 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
393 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
394 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
395 ('http://[::ffff:12.34.56.78]/foo/',
396 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200397 ('http://Test.python.org:/foo/', 'test.python.org', None),
398 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
399 ('http://[::1]:/foo/', '::1', None),
400 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
401 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
402 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
403 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
404 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
405 ('http://[::ffff:12.34.56.78]:/foo/',
406 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000407 ]
408 def _encode(t):
409 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
410 bytes_cases = [_encode(x) for x in str_cases]
411 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000412 urlparsed = urllib.parse.urlparse(url)
413 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
414
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000415 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000416 'http://::12.34.56.78]/',
417 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000418 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000419 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000420 'http://[::ffff:12.34.56.78']
421 bytes_cases = [x.encode('ascii') for x in str_cases]
422 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000423 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000424
Fred Drake70705652002-10-16 21:02:36 +0000425 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000426 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000427 ('http://python.org#frag', 'http://python.org', 'frag'),
428 ('http://python.org', 'http://python.org', ''),
429 ('http://python.org/#frag', 'http://python.org/', 'frag'),
430 ('http://python.org/', 'http://python.org/', ''),
431 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
432 ('http://python.org/?q', 'http://python.org/?q', ''),
433 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
434 ('http://python.org/p?q', 'http://python.org/p?q', ''),
435 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
436 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000437 ]
438 def _encode(t):
439 return type(t)(x.encode('ascii') for x in t)
440 bytes_cases = [_encode(x) for x in str_cases]
441 for url, defrag, frag in str_cases + bytes_cases:
442 result = urllib.parse.urldefrag(url)
443 self.assertEqual(result.geturl(), url)
444 self.assertEqual(result, (defrag, frag))
445 self.assertEqual(result.url, defrag)
446 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000447
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000448 def test_urlsplit_attributes(self):
449 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000450 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000451 self.assertEqual(p.scheme, "http")
452 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
453 self.assertEqual(p.path, "/doc/")
454 self.assertEqual(p.query, "")
455 self.assertEqual(p.fragment, "frag")
456 self.assertEqual(p.username, None)
457 self.assertEqual(p.password, None)
458 self.assertEqual(p.hostname, "www.python.org")
459 self.assertEqual(p.port, None)
460 # geturl() won't return exactly the original URL in this case
461 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000462 # We handle this by ignoring the first 4 characters of the URL
463 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464
465 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000466 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000467 self.assertEqual(p.scheme, "http")
468 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
469 self.assertEqual(p.path, "/doc/")
470 self.assertEqual(p.query, "query=yes")
471 self.assertEqual(p.fragment, "frag")
472 self.assertEqual(p.username, "User")
473 self.assertEqual(p.password, "Pass")
474 self.assertEqual(p.hostname, "www.python.org")
475 self.assertEqual(p.port, 80)
476 self.assertEqual(p.geturl(), url)
477
Christian Heimesfaf2f632008-01-06 16:59:19 +0000478 # Addressing issue1698, which suggests Username can contain
479 # "@" characters. Though not RFC compliant, many ftp sites allow
480 # and request email addresses as usernames.
481
482 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000483 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000484 self.assertEqual(p.scheme, "http")
485 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
486 self.assertEqual(p.path, "/doc/")
487 self.assertEqual(p.query, "query=yes")
488 self.assertEqual(p.fragment, "frag")
489 self.assertEqual(p.username, "User@example.com")
490 self.assertEqual(p.password, "Pass")
491 self.assertEqual(p.hostname, "www.python.org")
492 self.assertEqual(p.port, 80)
493 self.assertEqual(p.geturl(), url)
494
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000495 # And check them all again, only with bytes this time
496 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
497 p = urllib.parse.urlsplit(url)
498 self.assertEqual(p.scheme, b"http")
499 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
500 self.assertEqual(p.path, b"/doc/")
501 self.assertEqual(p.query, b"")
502 self.assertEqual(p.fragment, b"frag")
503 self.assertEqual(p.username, None)
504 self.assertEqual(p.password, None)
505 self.assertEqual(p.hostname, b"www.python.org")
506 self.assertEqual(p.port, None)
507 self.assertEqual(p.geturl()[4:], url[4:])
508
509 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
510 p = urllib.parse.urlsplit(url)
511 self.assertEqual(p.scheme, b"http")
512 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
513 self.assertEqual(p.path, b"/doc/")
514 self.assertEqual(p.query, b"query=yes")
515 self.assertEqual(p.fragment, b"frag")
516 self.assertEqual(p.username, b"User")
517 self.assertEqual(p.password, b"Pass")
518 self.assertEqual(p.hostname, b"www.python.org")
519 self.assertEqual(p.port, 80)
520 self.assertEqual(p.geturl(), url)
521
522 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
523 p = urllib.parse.urlsplit(url)
524 self.assertEqual(p.scheme, b"http")
525 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
526 self.assertEqual(p.path, b"/doc/")
527 self.assertEqual(p.query, b"query=yes")
528 self.assertEqual(p.fragment, b"frag")
529 self.assertEqual(p.username, b"User@example.com")
530 self.assertEqual(p.password, b"Pass")
531 self.assertEqual(p.hostname, b"www.python.org")
532 self.assertEqual(p.port, 80)
533 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000534
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800535 # Verify an illegal port is returned as None
536 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
537 p = urllib.parse.urlsplit(url)
538 self.assertEqual(p.port, None)
539
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000540 def test_attributes_bad_port(self):
541 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000542 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000543 self.assertEqual(p.netloc, "www.example.net:foo")
544 self.assertRaises(ValueError, lambda: p.port)
545
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000546 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000547 self.assertEqual(p.netloc, "www.example.net:foo")
548 self.assertRaises(ValueError, lambda: p.port)
549
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000550 # Once again, repeat ourselves to test bytes
551 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
552 self.assertEqual(p.netloc, b"www.example.net:foo")
553 self.assertRaises(ValueError, lambda: p.port)
554
555 p = urllib.parse.urlparse(b"http://www.example.net:foo")
556 self.assertEqual(p.netloc, b"www.example.net:foo")
557 self.assertRaises(ValueError, lambda: p.port)
558
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000559 def test_attributes_without_netloc(self):
560 # This example is straight from RFC 3261. It looks like it
561 # should allow the username, hostname, and port to be filled
562 # in, but doesn't. Since it's a URI and doesn't use the
563 # scheme://netloc syntax, the netloc and related attributes
564 # should be left empty.
565 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000566 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 self.assertEqual(p.netloc, "")
568 self.assertEqual(p.username, None)
569 self.assertEqual(p.password, None)
570 self.assertEqual(p.hostname, None)
571 self.assertEqual(p.port, None)
572 self.assertEqual(p.geturl(), uri)
573
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000574 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000575 self.assertEqual(p.netloc, "")
576 self.assertEqual(p.username, None)
577 self.assertEqual(p.password, None)
578 self.assertEqual(p.hostname, None)
579 self.assertEqual(p.port, None)
580 self.assertEqual(p.geturl(), uri)
581
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000582 # You guessed it, repeating the test with bytes input
583 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
584 p = urllib.parse.urlsplit(uri)
585 self.assertEqual(p.netloc, b"")
586 self.assertEqual(p.username, None)
587 self.assertEqual(p.password, None)
588 self.assertEqual(p.hostname, None)
589 self.assertEqual(p.port, None)
590 self.assertEqual(p.geturl(), uri)
591
592 p = urllib.parse.urlparse(uri)
593 self.assertEqual(p.netloc, b"")
594 self.assertEqual(p.username, None)
595 self.assertEqual(p.password, None)
596 self.assertEqual(p.hostname, None)
597 self.assertEqual(p.port, None)
598 self.assertEqual(p.geturl(), uri)
599
Christian Heimesfaf2f632008-01-06 16:59:19 +0000600 def test_noslash(self):
601 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000602 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000603 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000604 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
605 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000606
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000607 def test_withoutscheme(self):
608 # Test urlparse without scheme
609 # Issue 754016: urlparse goes wrong with IP:port without scheme
610 # RFC 1808 specifies that netloc should start with //, urlparse expects
611 # the same, otherwise it classifies the portion of url as path.
612 self.assertEqual(urllib.parse.urlparse("path"),
613 ('','','path','','',''))
614 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
615 ('','www.python.org:80','','','',''))
616 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
617 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000618 # Repeat for bytes input
619 self.assertEqual(urllib.parse.urlparse(b"path"),
620 (b'',b'',b'path',b'',b'',b''))
621 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
622 (b'',b'www.python.org:80',b'',b'',b'',b''))
623 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
624 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000625
626 def test_portseparator(self):
627 # Issue 754016 makes changes for port separator ':' from scheme separator
628 self.assertEqual(urllib.parse.urlparse("path:80"),
629 ('','','path:80','','',''))
630 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
631 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
632 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
633 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000634 # As usual, need to check bytes input as well
635 self.assertEqual(urllib.parse.urlparse(b"path:80"),
636 (b'',b'',b'path:80',b'',b'',b''))
637 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
638 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
639 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
640 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000641
Facundo Batista2ac5de22008-07-07 18:24:11 +0000642 def test_usingsys(self):
643 # Issue 3314: sys module is used in the error
644 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
645
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000646 def test_anyscheme(self):
647 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000648 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
649 ('s3', 'foo.com', '/stuff', '', '', ''))
650 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
651 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800652 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
653 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
654 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
655 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
656
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000657 # And for bytes...
658 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
659 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
660 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
661 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800662 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
663 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
664 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
665 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000666
667 def test_mixed_types_rejected(self):
668 # Several functions that process either strings or ASCII encoded bytes
669 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000670 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000671 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000672 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000673 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000674 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000675 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000676 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000677 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000678 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000679 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000680 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000681 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000682 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000683 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000684 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000685 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000686 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000687 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000688 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000689 urllib.parse.urljoin(b"http://python.org", "http://python.org")
690
691 def _check_result_type(self, str_type):
692 num_args = len(str_type._fields)
693 bytes_type = str_type._encoded_counterpart
694 self.assertIs(bytes_type._decoded_counterpart, str_type)
695 str_args = ('',) * num_args
696 bytes_args = (b'',) * num_args
697 str_result = str_type(*str_args)
698 bytes_result = bytes_type(*bytes_args)
699 encoding = 'ascii'
700 errors = 'strict'
701 self.assertEqual(str_result, str_args)
702 self.assertEqual(bytes_result.decode(), str_args)
703 self.assertEqual(bytes_result.decode(), str_result)
704 self.assertEqual(bytes_result.decode(encoding), str_args)
705 self.assertEqual(bytes_result.decode(encoding), str_result)
706 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
707 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
708 self.assertEqual(bytes_result, bytes_args)
709 self.assertEqual(str_result.encode(), bytes_args)
710 self.assertEqual(str_result.encode(), bytes_result)
711 self.assertEqual(str_result.encode(encoding), bytes_args)
712 self.assertEqual(str_result.encode(encoding), bytes_result)
713 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
714 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
715
716 def test_result_pairs(self):
717 # Check encoding and decoding between result pairs
718 result_types = [
719 urllib.parse.DefragResult,
720 urllib.parse.SplitResult,
721 urllib.parse.ParseResult,
722 ]
723 for result_type in result_types:
724 self._check_result_type(result_type)
725
Victor Stinner1d87deb2011-01-14 13:05:19 +0000726 def test_parse_qs_encoding(self):
727 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
728 self.assertEqual(result, {'key': ['\u0141\xE9']})
729 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
730 self.assertEqual(result, {'key': ['\u0141\xE9']})
731 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
732 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
733 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
734 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
735 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
736 errors="ignore")
737 self.assertEqual(result, {'key': ['\u0141-']})
738
739 def test_parse_qsl_encoding(self):
740 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
741 self.assertEqual(result, [('key', '\u0141\xE9')])
742 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
743 self.assertEqual(result, [('key', '\u0141\xE9')])
744 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
745 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
746 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
747 self.assertEqual(result, [('key', '\u0141\ufffd-')])
748 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
749 errors="ignore")
750 self.assertEqual(result, [('key', '\u0141-')])
751
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200752 def test_splitport(self):
753 splitport = urllib.parse.splitport
754 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
755 self.assertEqual(splitport('parrot'), ('parrot', None))
756 self.assertEqual(splitport('parrot:'), ('parrot', None))
757 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
758 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
759
Senthil Kumarande02a712011-07-23 18:27:45 +0800760 def test_splitnport(self):
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200761 splitnport = urllib.parse.splitnport
762 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
763 self.assertEqual(splitnport('parrot'), ('parrot', -1))
764 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
765 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
766 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
767 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
768 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
769 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
770 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
Senthil Kumarande02a712011-07-23 18:27:45 +0800771
772 def test_splitquery(self):
773 # Normal cases are exercised by other tests; ensure that we also
774 # catch cases with no port specified (testcase ensuring coverage)
775 result = urllib.parse.splitquery('http://python.org/fake?foo=bar')
776 self.assertEqual(result, ('http://python.org/fake', 'foo=bar'))
777 result = urllib.parse.splitquery('http://python.org/fake?foo=bar?')
778 self.assertEqual(result, ('http://python.org/fake?foo=bar', ''))
779 result = urllib.parse.splitquery('http://python.org/fake')
780 self.assertEqual(result, ('http://python.org/fake', None))
781
782 def test_splitvalue(self):
783 # Normal cases are exercised by other tests; test pathological cases
784 # with no key/value pairs. (testcase ensuring coverage)
785 result = urllib.parse.splitvalue('foo=bar')
786 self.assertEqual(result, ('foo', 'bar'))
787 result = urllib.parse.splitvalue('foo=')
788 self.assertEqual(result, ('foo', ''))
789 result = urllib.parse.splitvalue('foobar')
790 self.assertEqual(result, ('foobar', None))
791
792 def test_to_bytes(self):
793 result = urllib.parse.to_bytes('http://www.python.org')
794 self.assertEqual(result, 'http://www.python.org')
795 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
796 'http://www.python.org/medi\u00e6val')
797
798 def test_urlencode_sequences(self):
799 # Other tests incidentally urlencode things; test non-covered cases:
800 # Sequence and object values.
801 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100802 # we cannot rely on ordering here
803 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800804
805 class Trivial:
806 def __str__(self):
807 return 'trivial'
808
809 result = urllib.parse.urlencode({'a': Trivial()}, True)
810 self.assertEqual(result, 'a=trivial')
811
812 def test_quote_from_bytes(self):
813 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
814 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
815 self.assertEqual(result, 'archaeological%20arcana')
816 result = urllib.parse.quote_from_bytes(b'')
817 self.assertEqual(result, '')
818
819 def test_unquote_to_bytes(self):
820 result = urllib.parse.unquote_to_bytes('abc%20def')
821 self.assertEqual(result, b'abc def')
822 result = urllib.parse.unquote_to_bytes('')
823 self.assertEqual(result, b'')
824
825 def test_quote_errors(self):
826 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
827 encoding='utf-8')
828 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000829
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300830 def test_issue14072(self):
831 p1 = urllib.parse.urlsplit('tel:+31-641044153')
832 self.assertEqual(p1.scheme, 'tel')
833 self.assertEqual(p1.path, '+31-641044153')
834 p2 = urllib.parse.urlsplit('tel:+31641044153')
835 self.assertEqual(p2.scheme, 'tel')
836 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800837 # assert the behavior for urlparse
838 p1 = urllib.parse.urlparse('tel:+31-641044153')
839 self.assertEqual(p1.scheme, 'tel')
840 self.assertEqual(p1.path, '+31-641044153')
841 p2 = urllib.parse.urlparse('tel:+31641044153')
842 self.assertEqual(p2.scheme, 'tel')
843 self.assertEqual(p2.path, '+31641044153')
844
845 def test_telurl_params(self):
846 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
847 self.assertEqual(p1.scheme, 'tel')
848 self.assertEqual(p1.path, '123-4')
849 self.assertEqual(p1.params, 'phone-context=+1-650-516')
850
851 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
852 self.assertEqual(p1.scheme, 'tel')
853 self.assertEqual(p1.path, '+1-201-555-0123')
854 self.assertEqual(p1.params, '')
855
856 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
857 self.assertEqual(p1.scheme, 'tel')
858 self.assertEqual(p1.path, '7042')
859 self.assertEqual(p1.params, 'phone-context=example.com')
860
861 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
862 self.assertEqual(p1.scheme, 'tel')
863 self.assertEqual(p1.path, '863-1234')
864 self.assertEqual(p1.params, 'phone-context=+1-914-555')
865
R David Murrayf5163882013-03-21 20:56:51 -0400866 def test_unwrap(self):
867 url = urllib.parse.unwrap('<URL:type://host/path>')
868 self.assertEqual(url, 'type://host/path')
869
870 def test_Quoter_repr(self):
871 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
872 self.assertIn('Quoter', repr(quoter))
873
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000874
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000875def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000876 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000877
878if __name__ == "__main__":
879 test_main()