blob: ad9820bf23e80e2107509275216c3c49f909ff47 [file] [log] [blame]
Skip Montanaro6ec967d2002-03-23 05:32:10 +00001import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00002import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00003
Fred Drakea4d18a02001-01-05 05:57:04 +00004RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00005RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00006RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +00007SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +00008
Ezio Melottie130a522011-10-19 10:58:56 +03009# A list of test cases. Each test case is a two-tuple that contains
Facundo Batistac469d4c2008-09-03 22:49:01 +000010# a string with the query and a dictionary with the expected result.
11
12parse_qsl_test_cases = [
13 ("", []),
14 ("&", []),
15 ("&&", []),
16 ("=", [('', '')]),
17 ("=a", [('', 'a')]),
18 ("a", [('a', '')]),
19 ("a=", [('a', '')]),
20 ("a=", [('a', '')]),
21 ("&a=b", [('a', 'b')]),
22 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
23 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000024 (b"", []),
25 (b"&", []),
26 (b"&&", []),
27 (b"=", [(b'', b'')]),
28 (b"=a", [(b'', b'a')]),
29 (b"a", [(b'a', b'')]),
30 (b"a=", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
32 (b"&a=b", [(b'a', b'b')]),
33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000035]
36
Skip Montanaro6ec967d2002-03-23 05:32:10 +000037class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000038
39 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000040 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000042 t = (result.scheme, result.netloc, result.path,
43 result.params, result.query, result.fragment)
44 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000045 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000047 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000048 self.assertEqual(result2, result.geturl())
49
50 # the result of geturl() is a fixpoint; we can always parse it
51 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000052 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000053 self.assertEqual(result3.geturl(), result.geturl())
54 self.assertEqual(result3, result)
55 self.assertEqual(result3.scheme, result.scheme)
56 self.assertEqual(result3.netloc, result.netloc)
57 self.assertEqual(result3.path, result.path)
58 self.assertEqual(result3.params, result.params)
59 self.assertEqual(result3.query, result.query)
60 self.assertEqual(result3.fragment, result.fragment)
61 self.assertEqual(result3.username, result.username)
62 self.assertEqual(result3.password, result.password)
63 self.assertEqual(result3.hostname, result.hostname)
64 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000065
66 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000067 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000069 t = (result.scheme, result.netloc, result.path,
70 result.query, result.fragment)
71 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000072 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000073 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000074 self.assertEqual(result2, result.geturl())
75
76 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000077 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000078 self.assertEqual(result3.geturl(), result.geturl())
79 self.assertEqual(result3, result)
80 self.assertEqual(result3.scheme, result.scheme)
81 self.assertEqual(result3.netloc, result.netloc)
82 self.assertEqual(result3.path, result.path)
83 self.assertEqual(result3.query, result.query)
84 self.assertEqual(result3.fragment, result.fragment)
85 self.assertEqual(result3.username, result.username)
86 self.assertEqual(result3.password, result.password)
87 self.assertEqual(result3.hostname, result.hostname)
88 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000089
Facundo Batistac469d4c2008-09-03 22:49:01 +000090 def test_qsl(self):
91 for orig, expect in parse_qsl_test_cases:
92 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080093 self.assertEqual(result, expect, "Error parsing %r" % orig)
94 expect_without_blanks = [v for v in expect if len(v[1])]
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
96 self.assertEqual(result, expect_without_blanks,
97 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +000098
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000099 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000100 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000101 ('file:///tmp/junk.txt',
102 ('file', '', '/tmp/junk.txt', '', '', ''),
103 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000104 ('imap://mail.python.org/mbox1',
105 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
106 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000107 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
109 '', '', ''),
110 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
111 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000112 ('nfs://server/path/to/file.txt',
113 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
114 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000115 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
116 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
117 '', '', ''),
118 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000119 '', '')),
120 ('git+ssh://git@github.com/user/project.git',
121 ('git+ssh', 'git@github.com','/user/project.git',
122 '','',''),
123 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000124 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000125 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000126 def _encode(t):
127 return (t[0].encode('ascii'),
128 tuple(x.encode('ascii') for x in t[1]),
129 tuple(x.encode('ascii') for x in t[2]))
130 bytes_cases = [_encode(x) for x in str_cases]
131 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000132 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000133
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000134 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000136 # so we test both 'http:' and 'https:' in all the following.
137 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000138 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 ('://www.python.org',
140 ('www.python.org', '', '', '', ''),
141 ('www.python.org', '', '', '')),
142 ('://www.python.org#abc',
143 ('www.python.org', '', '', '', 'abc'),
144 ('www.python.org', '', '', 'abc')),
145 ('://www.python.org?q=abc',
146 ('www.python.org', '', '', 'q=abc', ''),
147 ('www.python.org', '', 'q=abc', '')),
148 ('://www.python.org/#abc',
149 ('www.python.org', '/', '', '', 'abc'),
150 ('www.python.org', '/', '', 'abc')),
151 ('://a/b/c/d;p?q#f',
152 ('a', '/b/c/d', 'p', 'q', 'f'),
153 ('a', '/b/c/d;p', 'q', 'f')),
154 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000155 def _encode(t):
156 return (t[0].encode('ascii'),
157 tuple(x.encode('ascii') for x in t[1]),
158 tuple(x.encode('ascii') for x in t[2]))
159 bytes_cases = [_encode(x) for x in str_cases]
160 str_schemes = ('http', 'https')
161 bytes_schemes = (b'http', b'https')
162 str_tests = str_schemes, str_cases
163 bytes_tests = bytes_schemes, bytes_cases
164 for schemes, test_cases in (str_tests, bytes_tests):
165 for scheme in schemes:
166 for url, parsed, split in test_cases:
167 url = scheme + url
168 parsed = (scheme,) + parsed
169 split = (scheme,) + split
170 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000171
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000172 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000173 str_components = (base, relurl, expected)
174 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
175 bytes_components = baseb, relurlb, expectedb = [
176 x.encode('ascii') for x in str_components]
177 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000178
179 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000180 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
181 bytes_cases = [x.encode('ascii') for x in str_cases]
182 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000183 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
184 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000185
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000186 def test_RFC1808(self):
187 # "normal" cases from RFC 1808:
188 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
189 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
190 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
191 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
192 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
193 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
195 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
196 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
197 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
198 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
199 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000200 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
201 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
202 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
203 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
204 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
205 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
206 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
207 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
208 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
209 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000210
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000211 # "abnormal" cases from RFC 1808:
212 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
213 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
214 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
215 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
216 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
217 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
218 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
219 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
220 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
221 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
222 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
223 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
224 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000225
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000226 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
227 # so we'll not actually run these tests (which expect 1808 behavior).
228 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
229 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000230
Senthil Kumaran397eb442011-04-15 18:20:24 +0800231 def test_RFC2368(self):
232 # Issue 11467: path that starts with a number is not parsed correctly
233 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
234 ('mailto', '', '1337@example.org', '', '', ''))
235
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000236 def test_RFC2396(self):
237 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000238
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000239
240 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
241 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
242 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
243 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
244 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
245 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
246 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
247 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
248 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
249 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
250 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
251 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
252 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
253 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
254 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
255 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
256 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
257 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
258 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
259 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
260 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
261 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
262 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
263 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
264 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
265 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
266 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
267 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
268 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
269 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
270 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
271 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
272 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
273 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
274 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
275 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
276 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
277 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
278 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
279
Facundo Batista23e38562008-08-14 16:55:14 +0000280 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000281 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000282 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
283 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000284 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
285 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
286 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
287 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
288 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
289 self.checkJoin(RFC3986_BASE, '//g','http://g')
290 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
291 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
292 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
293 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
294 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
295 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
296 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
297 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
298 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
299 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
300 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
301 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
302 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
303 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
304 self.checkJoin(RFC3986_BASE, '../..','http://a/')
305 self.checkJoin(RFC3986_BASE, '../../','http://a/')
306 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
307
308 #Abnormal Examples
309
310 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
311 # Tests are here for reference.
312
313 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
314 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
315 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
316 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
317
318 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
319 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
320 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
321 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
322 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
323 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
324 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
325 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
326 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
327 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
328 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
329 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
330 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
331 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
332 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
333 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000334
Senthil Kumarandca5b862010-12-17 04:48:45 +0000335 # Test for issue9721
336 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
337
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000338 def test_urljoins(self):
339 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
340 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
341 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
342 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
343 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
344 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
345 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
346 self.checkJoin(SIMPLE_BASE, '//g','http://g')
347 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
348 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
349 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
350 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
351 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
352 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
353 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
354 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
355 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
356 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
357 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
358 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
359 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
360 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
361 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
362 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
363 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
364 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
365 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
366 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
367 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800368 self.checkJoin('http:///', '..','http:///')
369 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
370 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800371 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaran7ce71f62011-08-03 22:08:46 +0800372 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000373
Senthil Kumaranad02d232010-04-16 03:02:13 +0000374 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000375 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000376 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
377 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
378 ('http://[::1]:5432/foo/', '::1', 5432),
379 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
380 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
381 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
382 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
383 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
384 ('http://[::ffff:12.34.56.78]:5432/foo/',
385 '::ffff:12.34.56.78', 5432),
386 ('http://Test.python.org/foo/', 'test.python.org', None),
387 ('http://12.34.56.78/foo/', '12.34.56.78', None),
388 ('http://[::1]/foo/', '::1', None),
389 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
390 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
391 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
392 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
393 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
394 ('http://[::ffff:12.34.56.78]/foo/',
395 '::ffff:12.34.56.78', None),
Serhiy Storchakaff97b082014-01-18 18:30:33 +0200396 ('http://Test.python.org:/foo/', 'test.python.org', None),
397 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
398 ('http://[::1]:/foo/', '::1', None),
399 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
400 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
401 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
402 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
403 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
404 ('http://[::ffff:12.34.56.78]:/foo/',
405 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000406 ]
407 def _encode(t):
408 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
409 bytes_cases = [_encode(x) for x in str_cases]
410 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000411 urlparsed = urllib.parse.urlparse(url)
412 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
413
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000414 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000415 'http://::12.34.56.78]/',
416 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000417 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000418 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000419 'http://[::ffff:12.34.56.78']
420 bytes_cases = [x.encode('ascii') for x in str_cases]
421 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000422 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000423
Fred Drake70705652002-10-16 21:02:36 +0000424 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000425 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000426 ('http://python.org#frag', 'http://python.org', 'frag'),
427 ('http://python.org', 'http://python.org', ''),
428 ('http://python.org/#frag', 'http://python.org/', 'frag'),
429 ('http://python.org/', 'http://python.org/', ''),
430 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
431 ('http://python.org/?q', 'http://python.org/?q', ''),
432 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
433 ('http://python.org/p?q', 'http://python.org/p?q', ''),
434 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
435 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000436 ]
437 def _encode(t):
438 return type(t)(x.encode('ascii') for x in t)
439 bytes_cases = [_encode(x) for x in str_cases]
440 for url, defrag, frag in str_cases + bytes_cases:
441 result = urllib.parse.urldefrag(url)
442 self.assertEqual(result.geturl(), url)
443 self.assertEqual(result, (defrag, frag))
444 self.assertEqual(result.url, defrag)
445 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000446
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000447 def test_urlsplit_attributes(self):
448 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000449 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000450 self.assertEqual(p.scheme, "http")
451 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
452 self.assertEqual(p.path, "/doc/")
453 self.assertEqual(p.query, "")
454 self.assertEqual(p.fragment, "frag")
455 self.assertEqual(p.username, None)
456 self.assertEqual(p.password, None)
457 self.assertEqual(p.hostname, "www.python.org")
458 self.assertEqual(p.port, None)
459 # geturl() won't return exactly the original URL in this case
460 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000461 # We handle this by ignoring the first 4 characters of the URL
462 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000463
464 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000465 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000466 self.assertEqual(p.scheme, "http")
467 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
468 self.assertEqual(p.path, "/doc/")
469 self.assertEqual(p.query, "query=yes")
470 self.assertEqual(p.fragment, "frag")
471 self.assertEqual(p.username, "User")
472 self.assertEqual(p.password, "Pass")
473 self.assertEqual(p.hostname, "www.python.org")
474 self.assertEqual(p.port, 80)
475 self.assertEqual(p.geturl(), url)
476
Christian Heimesfaf2f632008-01-06 16:59:19 +0000477 # Addressing issue1698, which suggests Username can contain
478 # "@" characters. Though not RFC compliant, many ftp sites allow
479 # and request email addresses as usernames.
480
481 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000482 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000483 self.assertEqual(p.scheme, "http")
484 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
485 self.assertEqual(p.path, "/doc/")
486 self.assertEqual(p.query, "query=yes")
487 self.assertEqual(p.fragment, "frag")
488 self.assertEqual(p.username, "User@example.com")
489 self.assertEqual(p.password, "Pass")
490 self.assertEqual(p.hostname, "www.python.org")
491 self.assertEqual(p.port, 80)
492 self.assertEqual(p.geturl(), url)
493
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000494 # And check them all again, only with bytes this time
495 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
496 p = urllib.parse.urlsplit(url)
497 self.assertEqual(p.scheme, b"http")
498 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
499 self.assertEqual(p.path, b"/doc/")
500 self.assertEqual(p.query, b"")
501 self.assertEqual(p.fragment, b"frag")
502 self.assertEqual(p.username, None)
503 self.assertEqual(p.password, None)
504 self.assertEqual(p.hostname, b"www.python.org")
505 self.assertEqual(p.port, None)
506 self.assertEqual(p.geturl()[4:], url[4:])
507
508 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
509 p = urllib.parse.urlsplit(url)
510 self.assertEqual(p.scheme, b"http")
511 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
512 self.assertEqual(p.path, b"/doc/")
513 self.assertEqual(p.query, b"query=yes")
514 self.assertEqual(p.fragment, b"frag")
515 self.assertEqual(p.username, b"User")
516 self.assertEqual(p.password, b"Pass")
517 self.assertEqual(p.hostname, b"www.python.org")
518 self.assertEqual(p.port, 80)
519 self.assertEqual(p.geturl(), url)
520
521 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
522 p = urllib.parse.urlsplit(url)
523 self.assertEqual(p.scheme, b"http")
524 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
525 self.assertEqual(p.path, b"/doc/")
526 self.assertEqual(p.query, b"query=yes")
527 self.assertEqual(p.fragment, b"frag")
528 self.assertEqual(p.username, b"User@example.com")
529 self.assertEqual(p.password, b"Pass")
530 self.assertEqual(p.hostname, b"www.python.org")
531 self.assertEqual(p.port, 80)
532 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000533
Senthil Kumaran2fc5a502012-05-24 21:56:17 +0800534 # Verify an illegal port is returned as None
535 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
536 p = urllib.parse.urlsplit(url)
537 self.assertEqual(p.port, None)
538
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000539 def test_attributes_bad_port(self):
540 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000541 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000542 self.assertEqual(p.netloc, "www.example.net:foo")
543 self.assertRaises(ValueError, lambda: p.port)
544
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000545 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000546 self.assertEqual(p.netloc, "www.example.net:foo")
547 self.assertRaises(ValueError, lambda: p.port)
548
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000549 # Once again, repeat ourselves to test bytes
550 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
551 self.assertEqual(p.netloc, b"www.example.net:foo")
552 self.assertRaises(ValueError, lambda: p.port)
553
554 p = urllib.parse.urlparse(b"http://www.example.net:foo")
555 self.assertEqual(p.netloc, b"www.example.net:foo")
556 self.assertRaises(ValueError, lambda: p.port)
557
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000558 def test_attributes_without_netloc(self):
559 # This example is straight from RFC 3261. It looks like it
560 # should allow the username, hostname, and port to be filled
561 # in, but doesn't. Since it's a URI and doesn't use the
562 # scheme://netloc syntax, the netloc and related attributes
563 # should be left empty.
564 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000565 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000566 self.assertEqual(p.netloc, "")
567 self.assertEqual(p.username, None)
568 self.assertEqual(p.password, None)
569 self.assertEqual(p.hostname, None)
570 self.assertEqual(p.port, None)
571 self.assertEqual(p.geturl(), uri)
572
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000573 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000574 self.assertEqual(p.netloc, "")
575 self.assertEqual(p.username, None)
576 self.assertEqual(p.password, None)
577 self.assertEqual(p.hostname, None)
578 self.assertEqual(p.port, None)
579 self.assertEqual(p.geturl(), uri)
580
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000581 # You guessed it, repeating the test with bytes input
582 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
583 p = urllib.parse.urlsplit(uri)
584 self.assertEqual(p.netloc, b"")
585 self.assertEqual(p.username, None)
586 self.assertEqual(p.password, None)
587 self.assertEqual(p.hostname, None)
588 self.assertEqual(p.port, None)
589 self.assertEqual(p.geturl(), uri)
590
591 p = urllib.parse.urlparse(uri)
592 self.assertEqual(p.netloc, b"")
593 self.assertEqual(p.username, None)
594 self.assertEqual(p.password, None)
595 self.assertEqual(p.hostname, None)
596 self.assertEqual(p.port, None)
597 self.assertEqual(p.geturl(), uri)
598
Christian Heimesfaf2f632008-01-06 16:59:19 +0000599 def test_noslash(self):
600 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000601 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000602 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000603 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
604 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000605
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000606 def test_withoutscheme(self):
607 # Test urlparse without scheme
608 # Issue 754016: urlparse goes wrong with IP:port without scheme
609 # RFC 1808 specifies that netloc should start with //, urlparse expects
610 # the same, otherwise it classifies the portion of url as path.
611 self.assertEqual(urllib.parse.urlparse("path"),
612 ('','','path','','',''))
613 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
614 ('','www.python.org:80','','','',''))
615 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
616 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000617 # Repeat for bytes input
618 self.assertEqual(urllib.parse.urlparse(b"path"),
619 (b'',b'',b'path',b'',b'',b''))
620 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
621 (b'',b'www.python.org:80',b'',b'',b'',b''))
622 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
623 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000624
625 def test_portseparator(self):
626 # Issue 754016 makes changes for port separator ':' from scheme separator
627 self.assertEqual(urllib.parse.urlparse("path:80"),
628 ('','','path:80','','',''))
629 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
630 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
631 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
632 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000633 # As usual, need to check bytes input as well
634 self.assertEqual(urllib.parse.urlparse(b"path:80"),
635 (b'',b'',b'path:80',b'',b'',b''))
636 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
637 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
638 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
639 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000640
Facundo Batista2ac5de22008-07-07 18:24:11 +0000641 def test_usingsys(self):
642 # Issue 3314: sys module is used in the error
643 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
644
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000645 def test_anyscheme(self):
646 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000647 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
648 ('s3', 'foo.com', '/stuff', '', '', ''))
649 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
650 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800651 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
652 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
653 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
654 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
655
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000656 # And for bytes...
657 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
658 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
659 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
660 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800661 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
662 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
663 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
664 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000665
666 def test_mixed_types_rejected(self):
667 # Several functions that process either strings or ASCII encoded bytes
668 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000669 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000670 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000671 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000672 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000673 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000674 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000675 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000676 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000677 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000678 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000679 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000680 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000681 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000682 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000683 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000684 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000685 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000686 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000687 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000688 urllib.parse.urljoin(b"http://python.org", "http://python.org")
689
690 def _check_result_type(self, str_type):
691 num_args = len(str_type._fields)
692 bytes_type = str_type._encoded_counterpart
693 self.assertIs(bytes_type._decoded_counterpart, str_type)
694 str_args = ('',) * num_args
695 bytes_args = (b'',) * num_args
696 str_result = str_type(*str_args)
697 bytes_result = bytes_type(*bytes_args)
698 encoding = 'ascii'
699 errors = 'strict'
700 self.assertEqual(str_result, str_args)
701 self.assertEqual(bytes_result.decode(), str_args)
702 self.assertEqual(bytes_result.decode(), str_result)
703 self.assertEqual(bytes_result.decode(encoding), str_args)
704 self.assertEqual(bytes_result.decode(encoding), str_result)
705 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
706 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
707 self.assertEqual(bytes_result, bytes_args)
708 self.assertEqual(str_result.encode(), bytes_args)
709 self.assertEqual(str_result.encode(), bytes_result)
710 self.assertEqual(str_result.encode(encoding), bytes_args)
711 self.assertEqual(str_result.encode(encoding), bytes_result)
712 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
713 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
714
715 def test_result_pairs(self):
716 # Check encoding and decoding between result pairs
717 result_types = [
718 urllib.parse.DefragResult,
719 urllib.parse.SplitResult,
720 urllib.parse.ParseResult,
721 ]
722 for result_type in result_types:
723 self._check_result_type(result_type)
724
Victor Stinner1d87deb2011-01-14 13:05:19 +0000725 def test_parse_qs_encoding(self):
726 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
727 self.assertEqual(result, {'key': ['\u0141\xE9']})
728 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
729 self.assertEqual(result, {'key': ['\u0141\xE9']})
730 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
731 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
732 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
733 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
734 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
735 errors="ignore")
736 self.assertEqual(result, {'key': ['\u0141-']})
737
738 def test_parse_qsl_encoding(self):
739 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
740 self.assertEqual(result, [('key', '\u0141\xE9')])
741 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
742 self.assertEqual(result, [('key', '\u0141\xE9')])
743 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
744 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
745 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
746 self.assertEqual(result, [('key', '\u0141\ufffd-')])
747 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
748 errors="ignore")
749 self.assertEqual(result, [('key', '\u0141-')])
750
Senthil Kumarande02a712011-07-23 18:27:45 +0800751 def test_urlencode_sequences(self):
752 # Other tests incidentally urlencode things; test non-covered cases:
753 # Sequence and object values.
754 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
Georg Brandl09a7c722012-02-20 21:31:46 +0100755 # we cannot rely on ordering here
756 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
Senthil Kumarande02a712011-07-23 18:27:45 +0800757
758 class Trivial:
759 def __str__(self):
760 return 'trivial'
761
762 result = urllib.parse.urlencode({'a': Trivial()}, True)
763 self.assertEqual(result, 'a=trivial')
764
765 def test_quote_from_bytes(self):
766 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
767 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
768 self.assertEqual(result, 'archaeological%20arcana')
769 result = urllib.parse.quote_from_bytes(b'')
770 self.assertEqual(result, '')
771
772 def test_unquote_to_bytes(self):
773 result = urllib.parse.unquote_to_bytes('abc%20def')
774 self.assertEqual(result, b'abc def')
775 result = urllib.parse.unquote_to_bytes('')
776 self.assertEqual(result, b'')
777
778 def test_quote_errors(self):
779 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
780 encoding='utf-8')
781 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000782
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300783 def test_issue14072(self):
784 p1 = urllib.parse.urlsplit('tel:+31-641044153')
785 self.assertEqual(p1.scheme, 'tel')
786 self.assertEqual(p1.path, '+31-641044153')
787 p2 = urllib.parse.urlsplit('tel:+31641044153')
788 self.assertEqual(p2.scheme, 'tel')
789 self.assertEqual(p2.path, '+31641044153')
Senthil Kumaraned301992012-12-24 14:00:20 -0800790 # assert the behavior for urlparse
791 p1 = urllib.parse.urlparse('tel:+31-641044153')
792 self.assertEqual(p1.scheme, 'tel')
793 self.assertEqual(p1.path, '+31-641044153')
794 p2 = urllib.parse.urlparse('tel:+31641044153')
795 self.assertEqual(p2.scheme, 'tel')
796 self.assertEqual(p2.path, '+31641044153')
797
798 def test_telurl_params(self):
799 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
800 self.assertEqual(p1.scheme, 'tel')
801 self.assertEqual(p1.path, '123-4')
802 self.assertEqual(p1.params, 'phone-context=+1-650-516')
803
804 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
805 self.assertEqual(p1.scheme, 'tel')
806 self.assertEqual(p1.path, '+1-201-555-0123')
807 self.assertEqual(p1.params, '')
808
809 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
810 self.assertEqual(p1.scheme, 'tel')
811 self.assertEqual(p1.path, '7042')
812 self.assertEqual(p1.params, 'phone-context=example.com')
813
814 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
815 self.assertEqual(p1.scheme, 'tel')
816 self.assertEqual(p1.path, '863-1234')
817 self.assertEqual(p1.params, 'phone-context=+1-914-555')
818
R David Murrayf5163882013-03-21 20:56:51 -0400819 def test_Quoter_repr(self):
820 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
821 self.assertIn('Quoter', repr(quoter))
822
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000823
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200824class Utility_Tests(unittest.TestCase):
825 """Testcase to test the various utility functions in the urllib."""
826 # In Python 2 this test class was in test_urllib.
827
828 def test_splittype(self):
829 splittype = urllib.parse.splittype
830 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
831 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
832 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
833 self.assertEqual(splittype('type:'), ('type', ''))
834 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
835
836 def test_splithost(self):
837 splithost = urllib.parse.splithost
838 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
839 ('www.example.org:80', '/foo/bar/baz.html'))
840 self.assertEqual(splithost('//www.example.org:80'),
841 ('www.example.org:80', ''))
842 self.assertEqual(splithost('/foo/bar/baz.html'),
843 (None, '/foo/bar/baz.html'))
844
845 def test_splituser(self):
846 splituser = urllib.parse.splituser
847 self.assertEqual(splituser('User:Pass@www.python.org:080'),
848 ('User:Pass', 'www.python.org:080'))
849 self.assertEqual(splituser('@www.python.org:080'),
850 ('', 'www.python.org:080'))
851 self.assertEqual(splituser('www.python.org:080'),
852 (None, 'www.python.org:080'))
853 self.assertEqual(splituser('User:Pass@'),
854 ('User:Pass', ''))
855 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
856 ('User@example.com:Pass', 'www.python.org:080'))
857
858 def test_splitpasswd(self):
859 # Some of the password examples are not sensible, but it is added to
860 # confirming to RFC2617 and addressing issue4675.
861 splitpasswd = urllib.parse.splitpasswd
862 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
863 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
864 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
865 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
866 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
867 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
868 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
869 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
870 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
871 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
872 self.assertEqual(splitpasswd('user:'), ('user', ''))
873 self.assertEqual(splitpasswd('user'), ('user', None))
874 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
875
876 def test_splitport(self):
877 splitport = urllib.parse.splitport
878 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
879 self.assertEqual(splitport('parrot'), ('parrot', None))
880 self.assertEqual(splitport('parrot:'), ('parrot', None))
881 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
882 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
883 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
884 self.assertEqual(splitport('[::1]'), ('[::1]', None))
885 self.assertEqual(splitport(':88'), ('', '88'))
886
887 def test_splitnport(self):
888 splitnport = urllib.parse.splitnport
889 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
890 self.assertEqual(splitnport('parrot'), ('parrot', -1))
891 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
892 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
893 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
894 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
895 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
896 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
897 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
898
899 def test_splitquery(self):
900 # Normal cases are exercised by other tests; ensure that we also
901 # catch cases with no port specified (testcase ensuring coverage)
902 splitquery = urllib.parse.splitquery
903 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
904 ('http://python.org/fake', 'foo=bar'))
905 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
906 ('http://python.org/fake?foo=bar', ''))
907 self.assertEqual(splitquery('http://python.org/fake'),
908 ('http://python.org/fake', None))
909 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
910
911 def test_splittag(self):
912 splittag = urllib.parse.splittag
913 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
914 ('http://example.com?foo=bar', 'baz'))
915 self.assertEqual(splittag('http://example.com?foo=bar#'),
916 ('http://example.com?foo=bar', ''))
917 self.assertEqual(splittag('#baz'), ('', 'baz'))
918 self.assertEqual(splittag('http://example.com?foo=bar'),
919 ('http://example.com?foo=bar', None))
920 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
921 ('http://example.com?foo=bar#baz', 'boo'))
922
923 def test_splitattr(self):
924 splitattr = urllib.parse.splitattr
925 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
926 ('/path', ['attr1=value1', 'attr2=value2']))
927 self.assertEqual(splitattr('/path;'), ('/path', ['']))
928 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
929 ('', ['attr1=value1', 'attr2=value2']))
930 self.assertEqual(splitattr('/path'), ('/path', []))
931
932 def test_splitvalue(self):
933 # Normal cases are exercised by other tests; test pathological cases
934 # with no key/value pairs. (testcase ensuring coverage)
935 splitvalue = urllib.parse.splitvalue
936 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
937 self.assertEqual(splitvalue('foo='), ('foo', ''))
938 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
939 self.assertEqual(splitvalue('foobar'), ('foobar', None))
940 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
941
942 def test_to_bytes(self):
943 result = urllib.parse.to_bytes('http://www.python.org')
944 self.assertEqual(result, 'http://www.python.org')
945 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
946 'http://www.python.org/medi\u00e6val')
947
948 def test_unwrap(self):
949 url = urllib.parse.unwrap('<URL:type://host/path>')
950 self.assertEqual(url, 'type://host/path')
951
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000952
953if __name__ == "__main__":
Serhiy Storchaka9270be72015-03-02 16:32:29 +0200954 unittest.main()