blob: af200d55e32f1607d0de6382d5d26add581dce3e [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00009RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +000010SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000011
Facundo Batistac469d4c2008-09-03 22:49:01 +000012# A list of test cases. Each test case is a a two-tuple that contains
13# a string with the query and a dictionary with the expected result.
14
15parse_qsl_test_cases = [
16 ("", []),
17 ("&", []),
18 ("&&", []),
19 ("=", [('', '')]),
20 ("=a", [('', 'a')]),
21 ("a", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("a=", [('a', '')]),
24 ("&a=b", [('a', 'b')]),
25 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
26 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000027 (b"", []),
28 (b"&", []),
29 (b"&&", []),
30 (b"=", [(b'', b'')]),
31 (b"=a", [(b'', b'a')]),
32 (b"a", [(b'a', b'')]),
33 (b"a=", [(b'a', b'')]),
34 (b"a=", [(b'a', b'')]),
35 (b"&a=b", [(b'a', b'b')]),
36 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
37 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000038]
39
Skip Montanaro6ec967d2002-03-23 05:32:10 +000040class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041
42 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000044 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000045 t = (result.scheme, result.netloc, result.path,
46 result.params, result.query, result.fragment)
47 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000048 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000049 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000050 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000051 self.assertEqual(result2, result.geturl())
52
53 # the result of geturl() is a fixpoint; we can always parse it
54 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000056 self.assertEqual(result3.geturl(), result.geturl())
57 self.assertEqual(result3, result)
58 self.assertEqual(result3.scheme, result.scheme)
59 self.assertEqual(result3.netloc, result.netloc)
60 self.assertEqual(result3.path, result.path)
61 self.assertEqual(result3.params, result.params)
62 self.assertEqual(result3.query, result.query)
63 self.assertEqual(result3.fragment, result.fragment)
64 self.assertEqual(result3.username, result.username)
65 self.assertEqual(result3.password, result.password)
66 self.assertEqual(result3.hostname, result.hostname)
67 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068
69 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000070 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000071 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000072 t = (result.scheme, result.netloc, result.path,
73 result.query, result.fragment)
74 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000075 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000076 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000077 self.assertEqual(result2, result.geturl())
78
79 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000080 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000081 self.assertEqual(result3.geturl(), result.geturl())
82 self.assertEqual(result3, result)
83 self.assertEqual(result3.scheme, result.scheme)
84 self.assertEqual(result3.netloc, result.netloc)
85 self.assertEqual(result3.path, result.path)
86 self.assertEqual(result3.query, result.query)
87 self.assertEqual(result3.fragment, result.fragment)
88 self.assertEqual(result3.username, result.username)
89 self.assertEqual(result3.password, result.password)
90 self.assertEqual(result3.hostname, result.hostname)
91 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000092
Facundo Batistac469d4c2008-09-03 22:49:01 +000093 def test_qsl(self):
94 for orig, expect in parse_qsl_test_cases:
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
Senthil Kumarande02a712011-07-23 18:27:45 +080096 self.assertEqual(result, expect, "Error parsing %r" % orig)
97 expect_without_blanks = [v for v in expect if len(v[1])]
98 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
99 self.assertEqual(result, expect_without_blanks,
100 "Error parsing %r" % orig)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000101
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000102 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000103 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000104 ('file:///tmp/junk.txt',
105 ('file', '', '/tmp/junk.txt', '', '', ''),
106 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000107 ('imap://mail.python.org/mbox1',
108 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
109 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000110 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000111 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
112 '', '', ''),
113 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
114 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000115 ('nfs://server/path/to/file.txt',
116 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
117 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000118 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
119 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
120 '', '', ''),
121 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000122 '', '')),
123 ('git+ssh://git@github.com/user/project.git',
124 ('git+ssh', 'git@github.com','/user/project.git',
125 '','',''),
126 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000127 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000128 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000129 def _encode(t):
130 return (t[0].encode('ascii'),
131 tuple(x.encode('ascii') for x in t[1]),
132 tuple(x.encode('ascii') for x in t[2]))
133 bytes_cases = [_encode(x) for x in str_cases]
134 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000135 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000136
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000137 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000138 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 # so we test both 'http:' and 'https:' in all the following.
140 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000141 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000142 ('://www.python.org',
143 ('www.python.org', '', '', '', ''),
144 ('www.python.org', '', '', '')),
145 ('://www.python.org#abc',
146 ('www.python.org', '', '', '', 'abc'),
147 ('www.python.org', '', '', 'abc')),
148 ('://www.python.org?q=abc',
149 ('www.python.org', '', '', 'q=abc', ''),
150 ('www.python.org', '', 'q=abc', '')),
151 ('://www.python.org/#abc',
152 ('www.python.org', '/', '', '', 'abc'),
153 ('www.python.org', '/', '', 'abc')),
154 ('://a/b/c/d;p?q#f',
155 ('a', '/b/c/d', 'p', 'q', 'f'),
156 ('a', '/b/c/d;p', 'q', 'f')),
157 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000158 def _encode(t):
159 return (t[0].encode('ascii'),
160 tuple(x.encode('ascii') for x in t[1]),
161 tuple(x.encode('ascii') for x in t[2]))
162 bytes_cases = [_encode(x) for x in str_cases]
163 str_schemes = ('http', 'https')
164 bytes_schemes = (b'http', b'https')
165 str_tests = str_schemes, str_cases
166 bytes_tests = bytes_schemes, bytes_cases
167 for schemes, test_cases in (str_tests, bytes_tests):
168 for scheme in schemes:
169 for url, parsed, split in test_cases:
170 url = scheme + url
171 parsed = (scheme,) + parsed
172 split = (scheme,) + split
173 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000174
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000175 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000176 str_components = (base, relurl, expected)
177 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
178 bytes_components = baseb, relurlb, expectedb = [
179 x.encode('ascii') for x in str_components]
180 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000181
182 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000183 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
184 bytes_cases = [x.encode('ascii') for x in str_cases]
185 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000186 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
187 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000188
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000189 def test_RFC1808(self):
190 # "normal" cases from RFC 1808:
191 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
192 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
193 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
194 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
195 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
196 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000197 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
198 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
199 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
200 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
201 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
202 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000203 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
204 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
205 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
206 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
207 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
208 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
209 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
210 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
211 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
212 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000213
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000214 # "abnormal" cases from RFC 1808:
215 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
216 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
217 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
218 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
219 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
220 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
221 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
222 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
223 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
224 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
225 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
226 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
227 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000228
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000229 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
230 # so we'll not actually run these tests (which expect 1808 behavior).
231 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
232 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000233
Senthil Kumaran397eb442011-04-15 18:20:24 +0800234 def test_RFC2368(self):
235 # Issue 11467: path that starts with a number is not parsed correctly
236 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
237 ('mailto', '', '1337@example.org', '', '', ''))
238
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000239 def test_RFC2396(self):
240 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000241
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000242
243 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
244 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
245 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
246 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
247 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
248 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
249 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
250 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
251 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
252 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
253 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
254 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
255 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
256 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
257 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
258 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
259 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
260 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
261 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
262 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
263 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
264 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
265 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
266 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
267 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
268 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
269 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
270 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
271 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
272 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
273 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
274 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
275 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
276 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
277 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
278 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
279 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
280 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
281 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
282
Facundo Batista23e38562008-08-14 16:55:14 +0000283 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000284 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000285 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
286 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000287 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
288 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
289 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
290 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
291 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
292 self.checkJoin(RFC3986_BASE, '//g','http://g')
293 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
294 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
295 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
296 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
297 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
298 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
299 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
300 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
301 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
302 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
303 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
304 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
305 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
306 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
307 self.checkJoin(RFC3986_BASE, '../..','http://a/')
308 self.checkJoin(RFC3986_BASE, '../../','http://a/')
309 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
310
311 #Abnormal Examples
312
313 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
314 # Tests are here for reference.
315
316 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
317 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
318 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
319 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
320
321 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
322 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
323 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
324 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
325 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
326 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
327 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
328 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
329 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
330 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
331 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
332 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
333 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
334 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
335 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
336 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000337
Senthil Kumarandca5b862010-12-17 04:48:45 +0000338 # Test for issue9721
339 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
340
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000341 def test_urljoins(self):
342 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
343 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
344 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
345 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
346 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
347 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
348 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
349 self.checkJoin(SIMPLE_BASE, '//g','http://g')
350 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
351 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
352 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
353 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
354 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
355 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
356 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
357 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
358 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
359 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
360 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
361 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
362 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
363 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
364 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
365 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
366 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
367 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
368 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
369 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
370 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
Senthil Kumarande02a712011-07-23 18:27:45 +0800371 self.checkJoin('http:///', '..','http:///')
372 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
373 self.checkJoin('', 'http://a/./g', 'http://a/./g')
Senthil Kumaran2a157d22011-08-03 18:37:22 +0800374 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
375 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000376
Senthil Kumaranad02d232010-04-16 03:02:13 +0000377 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000378 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000379 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
380 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
381 ('http://[::1]:5432/foo/', '::1', 5432),
382 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
383 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
384 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
385 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
386 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
387 ('http://[::ffff:12.34.56.78]:5432/foo/',
388 '::ffff:12.34.56.78', 5432),
389 ('http://Test.python.org/foo/', 'test.python.org', None),
390 ('http://12.34.56.78/foo/', '12.34.56.78', None),
391 ('http://[::1]/foo/', '::1', None),
392 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
393 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
394 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
395 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
396 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
397 ('http://[::ffff:12.34.56.78]/foo/',
398 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000399 ]
400 def _encode(t):
401 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
402 bytes_cases = [_encode(x) for x in str_cases]
403 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000404 urlparsed = urllib.parse.urlparse(url)
405 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
406
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000407 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000408 'http://::12.34.56.78]/',
409 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000410 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000411 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000412 'http://[::ffff:12.34.56.78']
413 bytes_cases = [x.encode('ascii') for x in str_cases]
414 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000415 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000416
Fred Drake70705652002-10-16 21:02:36 +0000417 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000418 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000419 ('http://python.org#frag', 'http://python.org', 'frag'),
420 ('http://python.org', 'http://python.org', ''),
421 ('http://python.org/#frag', 'http://python.org/', 'frag'),
422 ('http://python.org/', 'http://python.org/', ''),
423 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
424 ('http://python.org/?q', 'http://python.org/?q', ''),
425 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
426 ('http://python.org/p?q', 'http://python.org/p?q', ''),
427 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
428 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000429 ]
430 def _encode(t):
431 return type(t)(x.encode('ascii') for x in t)
432 bytes_cases = [_encode(x) for x in str_cases]
433 for url, defrag, frag in str_cases + bytes_cases:
434 result = urllib.parse.urldefrag(url)
435 self.assertEqual(result.geturl(), url)
436 self.assertEqual(result, (defrag, frag))
437 self.assertEqual(result.url, defrag)
438 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000439
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000440 def test_urlsplit_attributes(self):
441 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000442 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000443 self.assertEqual(p.scheme, "http")
444 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
445 self.assertEqual(p.path, "/doc/")
446 self.assertEqual(p.query, "")
447 self.assertEqual(p.fragment, "frag")
448 self.assertEqual(p.username, None)
449 self.assertEqual(p.password, None)
450 self.assertEqual(p.hostname, "www.python.org")
451 self.assertEqual(p.port, None)
452 # geturl() won't return exactly the original URL in this case
453 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000454 # We handle this by ignoring the first 4 characters of the URL
455 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000456
457 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000458 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000459 self.assertEqual(p.scheme, "http")
460 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
461 self.assertEqual(p.path, "/doc/")
462 self.assertEqual(p.query, "query=yes")
463 self.assertEqual(p.fragment, "frag")
464 self.assertEqual(p.username, "User")
465 self.assertEqual(p.password, "Pass")
466 self.assertEqual(p.hostname, "www.python.org")
467 self.assertEqual(p.port, 80)
468 self.assertEqual(p.geturl(), url)
469
Christian Heimesfaf2f632008-01-06 16:59:19 +0000470 # Addressing issue1698, which suggests Username can contain
471 # "@" characters. Though not RFC compliant, many ftp sites allow
472 # and request email addresses as usernames.
473
474 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000475 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000476 self.assertEqual(p.scheme, "http")
477 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
478 self.assertEqual(p.path, "/doc/")
479 self.assertEqual(p.query, "query=yes")
480 self.assertEqual(p.fragment, "frag")
481 self.assertEqual(p.username, "User@example.com")
482 self.assertEqual(p.password, "Pass")
483 self.assertEqual(p.hostname, "www.python.org")
484 self.assertEqual(p.port, 80)
485 self.assertEqual(p.geturl(), url)
486
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000487 # And check them all again, only with bytes this time
488 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
489 p = urllib.parse.urlsplit(url)
490 self.assertEqual(p.scheme, b"http")
491 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
492 self.assertEqual(p.path, b"/doc/")
493 self.assertEqual(p.query, b"")
494 self.assertEqual(p.fragment, b"frag")
495 self.assertEqual(p.username, None)
496 self.assertEqual(p.password, None)
497 self.assertEqual(p.hostname, b"www.python.org")
498 self.assertEqual(p.port, None)
499 self.assertEqual(p.geturl()[4:], url[4:])
500
501 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
502 p = urllib.parse.urlsplit(url)
503 self.assertEqual(p.scheme, b"http")
504 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
505 self.assertEqual(p.path, b"/doc/")
506 self.assertEqual(p.query, b"query=yes")
507 self.assertEqual(p.fragment, b"frag")
508 self.assertEqual(p.username, b"User")
509 self.assertEqual(p.password, b"Pass")
510 self.assertEqual(p.hostname, b"www.python.org")
511 self.assertEqual(p.port, 80)
512 self.assertEqual(p.geturl(), url)
513
514 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
515 p = urllib.parse.urlsplit(url)
516 self.assertEqual(p.scheme, b"http")
517 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
518 self.assertEqual(p.path, b"/doc/")
519 self.assertEqual(p.query, b"query=yes")
520 self.assertEqual(p.fragment, b"frag")
521 self.assertEqual(p.username, b"User@example.com")
522 self.assertEqual(p.password, b"Pass")
523 self.assertEqual(p.hostname, b"www.python.org")
524 self.assertEqual(p.port, 80)
525 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000526
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000527 def test_attributes_bad_port(self):
528 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000529 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000530 self.assertEqual(p.netloc, "www.example.net:foo")
531 self.assertRaises(ValueError, lambda: p.port)
532
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000533 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000534 self.assertEqual(p.netloc, "www.example.net:foo")
535 self.assertRaises(ValueError, lambda: p.port)
536
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000537 # Once again, repeat ourselves to test bytes
538 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
539 self.assertEqual(p.netloc, b"www.example.net:foo")
540 self.assertRaises(ValueError, lambda: p.port)
541
542 p = urllib.parse.urlparse(b"http://www.example.net:foo")
543 self.assertEqual(p.netloc, b"www.example.net:foo")
544 self.assertRaises(ValueError, lambda: p.port)
545
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000546 def test_attributes_without_netloc(self):
547 # This example is straight from RFC 3261. It looks like it
548 # should allow the username, hostname, and port to be filled
549 # in, but doesn't. Since it's a URI and doesn't use the
550 # scheme://netloc syntax, the netloc and related attributes
551 # should be left empty.
552 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000553 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000554 self.assertEqual(p.netloc, "")
555 self.assertEqual(p.username, None)
556 self.assertEqual(p.password, None)
557 self.assertEqual(p.hostname, None)
558 self.assertEqual(p.port, None)
559 self.assertEqual(p.geturl(), uri)
560
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000561 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000562 self.assertEqual(p.netloc, "")
563 self.assertEqual(p.username, None)
564 self.assertEqual(p.password, None)
565 self.assertEqual(p.hostname, None)
566 self.assertEqual(p.port, None)
567 self.assertEqual(p.geturl(), uri)
568
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000569 # You guessed it, repeating the test with bytes input
570 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
571 p = urllib.parse.urlsplit(uri)
572 self.assertEqual(p.netloc, b"")
573 self.assertEqual(p.username, None)
574 self.assertEqual(p.password, None)
575 self.assertEqual(p.hostname, None)
576 self.assertEqual(p.port, None)
577 self.assertEqual(p.geturl(), uri)
578
579 p = urllib.parse.urlparse(uri)
580 self.assertEqual(p.netloc, b"")
581 self.assertEqual(p.username, None)
582 self.assertEqual(p.password, None)
583 self.assertEqual(p.hostname, None)
584 self.assertEqual(p.port, None)
585 self.assertEqual(p.geturl(), uri)
586
Christian Heimesfaf2f632008-01-06 16:59:19 +0000587 def test_noslash(self):
588 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000589 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000590 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000591 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
592 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000593
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000594 def test_withoutscheme(self):
595 # Test urlparse without scheme
596 # Issue 754016: urlparse goes wrong with IP:port without scheme
597 # RFC 1808 specifies that netloc should start with //, urlparse expects
598 # the same, otherwise it classifies the portion of url as path.
599 self.assertEqual(urllib.parse.urlparse("path"),
600 ('','','path','','',''))
601 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
602 ('','www.python.org:80','','','',''))
603 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
604 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000605 # Repeat for bytes input
606 self.assertEqual(urllib.parse.urlparse(b"path"),
607 (b'',b'',b'path',b'',b'',b''))
608 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
609 (b'',b'www.python.org:80',b'',b'',b'',b''))
610 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
611 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000612
613 def test_portseparator(self):
614 # Issue 754016 makes changes for port separator ':' from scheme separator
615 self.assertEqual(urllib.parse.urlparse("path:80"),
616 ('','','path:80','','',''))
617 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
618 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
619 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
620 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000621 # As usual, need to check bytes input as well
622 self.assertEqual(urllib.parse.urlparse(b"path:80"),
623 (b'',b'',b'path:80',b'',b'',b''))
624 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
625 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
626 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
627 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000628
Facundo Batista2ac5de22008-07-07 18:24:11 +0000629 def test_usingsys(self):
630 # Issue 3314: sys module is used in the error
631 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
632
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000633 def test_anyscheme(self):
634 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000635 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
636 ('s3', 'foo.com', '/stuff', '', '', ''))
637 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
638 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000639 # And for bytes...
640 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
641 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
642 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
643 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
644
645 def test_mixed_types_rejected(self):
646 # Several functions that process either strings or ASCII encoded bytes
647 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000648 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000649 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000650 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000651 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000652 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000653 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000654 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000655 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000656 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000657 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000658 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000659 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000660 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000661 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000662 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000663 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000664 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000665 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000666 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000667 urllib.parse.urljoin(b"http://python.org", "http://python.org")
668
669 def _check_result_type(self, str_type):
670 num_args = len(str_type._fields)
671 bytes_type = str_type._encoded_counterpart
672 self.assertIs(bytes_type._decoded_counterpart, str_type)
673 str_args = ('',) * num_args
674 bytes_args = (b'',) * num_args
675 str_result = str_type(*str_args)
676 bytes_result = bytes_type(*bytes_args)
677 encoding = 'ascii'
678 errors = 'strict'
679 self.assertEqual(str_result, str_args)
680 self.assertEqual(bytes_result.decode(), str_args)
681 self.assertEqual(bytes_result.decode(), str_result)
682 self.assertEqual(bytes_result.decode(encoding), str_args)
683 self.assertEqual(bytes_result.decode(encoding), str_result)
684 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
685 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
686 self.assertEqual(bytes_result, bytes_args)
687 self.assertEqual(str_result.encode(), bytes_args)
688 self.assertEqual(str_result.encode(), bytes_result)
689 self.assertEqual(str_result.encode(encoding), bytes_args)
690 self.assertEqual(str_result.encode(encoding), bytes_result)
691 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
692 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
693
694 def test_result_pairs(self):
695 # Check encoding and decoding between result pairs
696 result_types = [
697 urllib.parse.DefragResult,
698 urllib.parse.SplitResult,
699 urllib.parse.ParseResult,
700 ]
701 for result_type in result_types:
702 self._check_result_type(result_type)
703
Victor Stinner1d87deb2011-01-14 13:05:19 +0000704 def test_parse_qs_encoding(self):
705 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
706 self.assertEqual(result, {'key': ['\u0141\xE9']})
707 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
708 self.assertEqual(result, {'key': ['\u0141\xE9']})
709 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
710 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
711 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
712 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
713 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
714 errors="ignore")
715 self.assertEqual(result, {'key': ['\u0141-']})
716
717 def test_parse_qsl_encoding(self):
718 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
719 self.assertEqual(result, [('key', '\u0141\xE9')])
720 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
721 self.assertEqual(result, [('key', '\u0141\xE9')])
722 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
723 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
724 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
725 self.assertEqual(result, [('key', '\u0141\ufffd-')])
726 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
727 errors="ignore")
728 self.assertEqual(result, [('key', '\u0141-')])
729
Senthil Kumarande02a712011-07-23 18:27:45 +0800730 def test_splitnport(self):
731 # Normal cases are exercised by other tests; ensure that we also
732 # catch cases with no port specified. (testcase ensuring coverage)
733 result = urllib.parse.splitnport('parrot:88')
734 self.assertEqual(result, ('parrot', 88))
735 result = urllib.parse.splitnport('parrot')
736 self.assertEqual(result, ('parrot', -1))
737 result = urllib.parse.splitnport('parrot', 55)
738 self.assertEqual(result, ('parrot', 55))
739 result = urllib.parse.splitnport('parrot:')
740 self.assertEqual(result, ('parrot', None))
741
742 def test_splitquery(self):
743 # Normal cases are exercised by other tests; ensure that we also
744 # catch cases with no port specified (testcase ensuring coverage)
745 result = urllib.parse.splitquery('http://python.org/fake?foo=bar')
746 self.assertEqual(result, ('http://python.org/fake', 'foo=bar'))
747 result = urllib.parse.splitquery('http://python.org/fake?foo=bar?')
748 self.assertEqual(result, ('http://python.org/fake?foo=bar', ''))
749 result = urllib.parse.splitquery('http://python.org/fake')
750 self.assertEqual(result, ('http://python.org/fake', None))
751
752 def test_splitvalue(self):
753 # Normal cases are exercised by other tests; test pathological cases
754 # with no key/value pairs. (testcase ensuring coverage)
755 result = urllib.parse.splitvalue('foo=bar')
756 self.assertEqual(result, ('foo', 'bar'))
757 result = urllib.parse.splitvalue('foo=')
758 self.assertEqual(result, ('foo', ''))
759 result = urllib.parse.splitvalue('foobar')
760 self.assertEqual(result, ('foobar', None))
761
762 def test_to_bytes(self):
763 result = urllib.parse.to_bytes('http://www.python.org')
764 self.assertEqual(result, 'http://www.python.org')
765 self.assertRaises(UnicodeError, urllib.parse.to_bytes,
766 'http://www.python.org/medi\u00e6val')
767
768 def test_urlencode_sequences(self):
769 # Other tests incidentally urlencode things; test non-covered cases:
770 # Sequence and object values.
771 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
772 self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5')
773
774 class Trivial:
775 def __str__(self):
776 return 'trivial'
777
778 result = urllib.parse.urlencode({'a': Trivial()}, True)
779 self.assertEqual(result, 'a=trivial')
780
781 def test_quote_from_bytes(self):
782 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
783 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
784 self.assertEqual(result, 'archaeological%20arcana')
785 result = urllib.parse.quote_from_bytes(b'')
786 self.assertEqual(result, '')
787
788 def test_unquote_to_bytes(self):
789 result = urllib.parse.unquote_to_bytes('abc%20def')
790 self.assertEqual(result, b'abc def')
791 result = urllib.parse.unquote_to_bytes('')
792 self.assertEqual(result, b'')
793
794 def test_quote_errors(self):
795 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
796 encoding='utf-8')
797 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
Victor Stinner1d87deb2011-01-14 13:05:19 +0000798
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000799
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000800def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000801 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000802
803if __name__ == "__main__":
804 test_main()