blob: c8d572cda16317331053e6787618278c5a400758 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00009RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +000010SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000011
Facundo Batistac469d4c2008-09-03 22:49:01 +000012# A list of test cases. Each test case is a a two-tuple that contains
13# a string with the query and a dictionary with the expected result.
14
15parse_qsl_test_cases = [
16 ("", []),
17 ("&", []),
18 ("&&", []),
19 ("=", [('', '')]),
20 ("=a", [('', 'a')]),
21 ("a", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("a=", [('a', '')]),
24 ("&a=b", [('a', 'b')]),
25 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
26 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000027 (b"", []),
28 (b"&", []),
29 (b"&&", []),
30 (b"=", [(b'', b'')]),
31 (b"=a", [(b'', b'a')]),
32 (b"a", [(b'a', b'')]),
33 (b"a=", [(b'a', b'')]),
34 (b"a=", [(b'a', b'')]),
35 (b"&a=b", [(b'a', b'b')]),
36 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
37 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000038]
39
Skip Montanaro6ec967d2002-03-23 05:32:10 +000040class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041
42 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000044 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000045 t = (result.scheme, result.netloc, result.path,
46 result.params, result.query, result.fragment)
47 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000048 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000049 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000050 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000051 self.assertEqual(result2, result.geturl())
52
53 # the result of geturl() is a fixpoint; we can always parse it
54 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000056 self.assertEqual(result3.geturl(), result.geturl())
57 self.assertEqual(result3, result)
58 self.assertEqual(result3.scheme, result.scheme)
59 self.assertEqual(result3.netloc, result.netloc)
60 self.assertEqual(result3.path, result.path)
61 self.assertEqual(result3.params, result.params)
62 self.assertEqual(result3.query, result.query)
63 self.assertEqual(result3.fragment, result.fragment)
64 self.assertEqual(result3.username, result.username)
65 self.assertEqual(result3.password, result.password)
66 self.assertEqual(result3.hostname, result.hostname)
67 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068
69 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000070 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000071 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000072 t = (result.scheme, result.netloc, result.path,
73 result.query, result.fragment)
74 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000075 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000076 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000077 self.assertEqual(result2, result.geturl())
78
79 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000080 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000081 self.assertEqual(result3.geturl(), result.geturl())
82 self.assertEqual(result3, result)
83 self.assertEqual(result3.scheme, result.scheme)
84 self.assertEqual(result3.netloc, result.netloc)
85 self.assertEqual(result3.path, result.path)
86 self.assertEqual(result3.query, result.query)
87 self.assertEqual(result3.fragment, result.fragment)
88 self.assertEqual(result3.username, result.username)
89 self.assertEqual(result3.password, result.password)
90 self.assertEqual(result3.hostname, result.hostname)
91 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000092
Facundo Batistac469d4c2008-09-03 22:49:01 +000093 def test_qsl(self):
94 for orig, expect in parse_qsl_test_cases:
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
96 self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
97
98
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000099 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000100 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000101 ('file:///tmp/junk.txt',
102 ('file', '', '/tmp/junk.txt', '', '', ''),
103 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000104 ('imap://mail.python.org/mbox1',
105 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
106 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000107 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
109 '', '', ''),
110 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
111 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000112 ('nfs://server/path/to/file.txt',
113 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
114 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000115 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
116 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
117 '', '', ''),
118 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000119 '', '')),
120 ('git+ssh://git@github.com/user/project.git',
121 ('git+ssh', 'git@github.com','/user/project.git',
122 '','',''),
123 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000124 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000125 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000126 def _encode(t):
127 return (t[0].encode('ascii'),
128 tuple(x.encode('ascii') for x in t[1]),
129 tuple(x.encode('ascii') for x in t[2]))
130 bytes_cases = [_encode(x) for x in str_cases]
131 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000132 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000133
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000134 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000136 # so we test both 'http:' and 'https:' in all the following.
137 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000138 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 ('://www.python.org',
140 ('www.python.org', '', '', '', ''),
141 ('www.python.org', '', '', '')),
142 ('://www.python.org#abc',
143 ('www.python.org', '', '', '', 'abc'),
144 ('www.python.org', '', '', 'abc')),
145 ('://www.python.org?q=abc',
146 ('www.python.org', '', '', 'q=abc', ''),
147 ('www.python.org', '', 'q=abc', '')),
148 ('://www.python.org/#abc',
149 ('www.python.org', '/', '', '', 'abc'),
150 ('www.python.org', '/', '', 'abc')),
151 ('://a/b/c/d;p?q#f',
152 ('a', '/b/c/d', 'p', 'q', 'f'),
153 ('a', '/b/c/d;p', 'q', 'f')),
154 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000155 def _encode(t):
156 return (t[0].encode('ascii'),
157 tuple(x.encode('ascii') for x in t[1]),
158 tuple(x.encode('ascii') for x in t[2]))
159 bytes_cases = [_encode(x) for x in str_cases]
160 str_schemes = ('http', 'https')
161 bytes_schemes = (b'http', b'https')
162 str_tests = str_schemes, str_cases
163 bytes_tests = bytes_schemes, bytes_cases
164 for schemes, test_cases in (str_tests, bytes_tests):
165 for scheme in schemes:
166 for url, parsed, split in test_cases:
167 url = scheme + url
168 parsed = (scheme,) + parsed
169 split = (scheme,) + split
170 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000171
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000172 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000173 str_components = (base, relurl, expected)
174 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
175 bytes_components = baseb, relurlb, expectedb = [
176 x.encode('ascii') for x in str_components]
177 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000178
179 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000180 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
181 bytes_cases = [x.encode('ascii') for x in str_cases]
182 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000183 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
184 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000185
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000186 def test_RFC1808(self):
187 # "normal" cases from RFC 1808:
188 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
189 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
190 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
191 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
192 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
193 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
195 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
196 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
197 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
198 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
199 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000200 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
201 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
202 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
203 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
204 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
205 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
206 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
207 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
208 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
209 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000210
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000211 # "abnormal" cases from RFC 1808:
212 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
213 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
214 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
215 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
216 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
217 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
218 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
219 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
220 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
221 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
222 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
223 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
224 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000225
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000226 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
227 # so we'll not actually run these tests (which expect 1808 behavior).
228 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
229 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000230
Senthil Kumaran397eb442011-04-15 18:20:24 +0800231 def test_RFC2368(self):
232 # Issue 11467: path that starts with a number is not parsed correctly
233 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
234 ('mailto', '', '1337@example.org', '', '', ''))
235
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000236 def test_RFC2396(self):
237 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000238
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000239
240 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
241 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
242 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
243 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
244 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
245 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
246 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
247 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
248 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
249 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
250 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
251 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
252 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
253 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
254 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
255 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
256 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
257 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
258 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
259 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
260 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
261 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
262 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
263 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
264 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
265 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
266 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
267 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
268 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
269 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
270 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
271 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
272 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
273 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
274 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
275 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
276 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
277 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
278 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
279
Facundo Batista23e38562008-08-14 16:55:14 +0000280 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000281 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000282 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
283 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000284 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
285 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
286 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
287 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
288 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
289 self.checkJoin(RFC3986_BASE, '//g','http://g')
290 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
291 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
292 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
293 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
294 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
295 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
296 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
297 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
298 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
299 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
300 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
301 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
302 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
303 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
304 self.checkJoin(RFC3986_BASE, '../..','http://a/')
305 self.checkJoin(RFC3986_BASE, '../../','http://a/')
306 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
307
308 #Abnormal Examples
309
310 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
311 # Tests are here for reference.
312
313 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
314 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
315 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
316 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
317
318 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
319 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
320 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
321 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
322 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
323 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
324 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
325 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
326 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
327 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
328 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
329 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
330 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
331 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
332 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
333 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000334
Senthil Kumarandca5b862010-12-17 04:48:45 +0000335 # Test for issue9721
336 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
337
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000338 def test_urljoins(self):
339 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
340 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
341 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
342 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
343 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
344 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
345 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
346 self.checkJoin(SIMPLE_BASE, '//g','http://g')
347 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
348 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
349 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
350 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
351 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
352 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
353 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
354 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
355 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
356 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
357 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
358 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
359 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
360 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
361 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
362 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
363 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
364 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
365 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
366 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
367 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
368
Senthil Kumaranad02d232010-04-16 03:02:13 +0000369 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000370 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000371 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
372 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
373 ('http://[::1]:5432/foo/', '::1', 5432),
374 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
375 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
376 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
377 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
378 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
379 ('http://[::ffff:12.34.56.78]:5432/foo/',
380 '::ffff:12.34.56.78', 5432),
381 ('http://Test.python.org/foo/', 'test.python.org', None),
382 ('http://12.34.56.78/foo/', '12.34.56.78', None),
383 ('http://[::1]/foo/', '::1', None),
384 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
385 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
386 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
387 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
388 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
389 ('http://[::ffff:12.34.56.78]/foo/',
390 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000391 ]
392 def _encode(t):
393 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
394 bytes_cases = [_encode(x) for x in str_cases]
395 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000396 urlparsed = urllib.parse.urlparse(url)
397 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
398
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000399 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000400 'http://::12.34.56.78]/',
401 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000402 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000403 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000404 'http://[::ffff:12.34.56.78']
405 bytes_cases = [x.encode('ascii') for x in str_cases]
406 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000407 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000408
Fred Drake70705652002-10-16 21:02:36 +0000409 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000410 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000411 ('http://python.org#frag', 'http://python.org', 'frag'),
412 ('http://python.org', 'http://python.org', ''),
413 ('http://python.org/#frag', 'http://python.org/', 'frag'),
414 ('http://python.org/', 'http://python.org/', ''),
415 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
416 ('http://python.org/?q', 'http://python.org/?q', ''),
417 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
418 ('http://python.org/p?q', 'http://python.org/p?q', ''),
419 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
420 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000421 ]
422 def _encode(t):
423 return type(t)(x.encode('ascii') for x in t)
424 bytes_cases = [_encode(x) for x in str_cases]
425 for url, defrag, frag in str_cases + bytes_cases:
426 result = urllib.parse.urldefrag(url)
427 self.assertEqual(result.geturl(), url)
428 self.assertEqual(result, (defrag, frag))
429 self.assertEqual(result.url, defrag)
430 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000431
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000432 def test_urlsplit_attributes(self):
433 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000434 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000435 self.assertEqual(p.scheme, "http")
436 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
437 self.assertEqual(p.path, "/doc/")
438 self.assertEqual(p.query, "")
439 self.assertEqual(p.fragment, "frag")
440 self.assertEqual(p.username, None)
441 self.assertEqual(p.password, None)
442 self.assertEqual(p.hostname, "www.python.org")
443 self.assertEqual(p.port, None)
444 # geturl() won't return exactly the original URL in this case
445 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000446 # We handle this by ignoring the first 4 characters of the URL
447 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000448
449 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000450 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000451 self.assertEqual(p.scheme, "http")
452 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
453 self.assertEqual(p.path, "/doc/")
454 self.assertEqual(p.query, "query=yes")
455 self.assertEqual(p.fragment, "frag")
456 self.assertEqual(p.username, "User")
457 self.assertEqual(p.password, "Pass")
458 self.assertEqual(p.hostname, "www.python.org")
459 self.assertEqual(p.port, 80)
460 self.assertEqual(p.geturl(), url)
461
Christian Heimesfaf2f632008-01-06 16:59:19 +0000462 # Addressing issue1698, which suggests Username can contain
463 # "@" characters. Though not RFC compliant, many ftp sites allow
464 # and request email addresses as usernames.
465
466 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000467 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000468 self.assertEqual(p.scheme, "http")
469 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
470 self.assertEqual(p.path, "/doc/")
471 self.assertEqual(p.query, "query=yes")
472 self.assertEqual(p.fragment, "frag")
473 self.assertEqual(p.username, "User@example.com")
474 self.assertEqual(p.password, "Pass")
475 self.assertEqual(p.hostname, "www.python.org")
476 self.assertEqual(p.port, 80)
477 self.assertEqual(p.geturl(), url)
478
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000479 # And check them all again, only with bytes this time
480 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
481 p = urllib.parse.urlsplit(url)
482 self.assertEqual(p.scheme, b"http")
483 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
484 self.assertEqual(p.path, b"/doc/")
485 self.assertEqual(p.query, b"")
486 self.assertEqual(p.fragment, b"frag")
487 self.assertEqual(p.username, None)
488 self.assertEqual(p.password, None)
489 self.assertEqual(p.hostname, b"www.python.org")
490 self.assertEqual(p.port, None)
491 self.assertEqual(p.geturl()[4:], url[4:])
492
493 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
494 p = urllib.parse.urlsplit(url)
495 self.assertEqual(p.scheme, b"http")
496 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
497 self.assertEqual(p.path, b"/doc/")
498 self.assertEqual(p.query, b"query=yes")
499 self.assertEqual(p.fragment, b"frag")
500 self.assertEqual(p.username, b"User")
501 self.assertEqual(p.password, b"Pass")
502 self.assertEqual(p.hostname, b"www.python.org")
503 self.assertEqual(p.port, 80)
504 self.assertEqual(p.geturl(), url)
505
506 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
507 p = urllib.parse.urlsplit(url)
508 self.assertEqual(p.scheme, b"http")
509 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
510 self.assertEqual(p.path, b"/doc/")
511 self.assertEqual(p.query, b"query=yes")
512 self.assertEqual(p.fragment, b"frag")
513 self.assertEqual(p.username, b"User@example.com")
514 self.assertEqual(p.password, b"Pass")
515 self.assertEqual(p.hostname, b"www.python.org")
516 self.assertEqual(p.port, 80)
517 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000518
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000519 def test_attributes_bad_port(self):
520 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000521 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000522 self.assertEqual(p.netloc, "www.example.net:foo")
523 self.assertRaises(ValueError, lambda: p.port)
524
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000525 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000526 self.assertEqual(p.netloc, "www.example.net:foo")
527 self.assertRaises(ValueError, lambda: p.port)
528
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000529 # Once again, repeat ourselves to test bytes
530 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
531 self.assertEqual(p.netloc, b"www.example.net:foo")
532 self.assertRaises(ValueError, lambda: p.port)
533
534 p = urllib.parse.urlparse(b"http://www.example.net:foo")
535 self.assertEqual(p.netloc, b"www.example.net:foo")
536 self.assertRaises(ValueError, lambda: p.port)
537
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000538 def test_attributes_without_netloc(self):
539 # This example is straight from RFC 3261. It looks like it
540 # should allow the username, hostname, and port to be filled
541 # in, but doesn't. Since it's a URI and doesn't use the
542 # scheme://netloc syntax, the netloc and related attributes
543 # should be left empty.
544 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000545 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000546 self.assertEqual(p.netloc, "")
547 self.assertEqual(p.username, None)
548 self.assertEqual(p.password, None)
549 self.assertEqual(p.hostname, None)
550 self.assertEqual(p.port, None)
551 self.assertEqual(p.geturl(), uri)
552
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000553 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000554 self.assertEqual(p.netloc, "")
555 self.assertEqual(p.username, None)
556 self.assertEqual(p.password, None)
557 self.assertEqual(p.hostname, None)
558 self.assertEqual(p.port, None)
559 self.assertEqual(p.geturl(), uri)
560
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000561 # You guessed it, repeating the test with bytes input
562 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
563 p = urllib.parse.urlsplit(uri)
564 self.assertEqual(p.netloc, b"")
565 self.assertEqual(p.username, None)
566 self.assertEqual(p.password, None)
567 self.assertEqual(p.hostname, None)
568 self.assertEqual(p.port, None)
569 self.assertEqual(p.geturl(), uri)
570
571 p = urllib.parse.urlparse(uri)
572 self.assertEqual(p.netloc, b"")
573 self.assertEqual(p.username, None)
574 self.assertEqual(p.password, None)
575 self.assertEqual(p.hostname, None)
576 self.assertEqual(p.port, None)
577 self.assertEqual(p.geturl(), uri)
578
Christian Heimesfaf2f632008-01-06 16:59:19 +0000579 def test_noslash(self):
580 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000581 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000582 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000583 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
584 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000585
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000586 def test_withoutscheme(self):
587 # Test urlparse without scheme
588 # Issue 754016: urlparse goes wrong with IP:port without scheme
589 # RFC 1808 specifies that netloc should start with //, urlparse expects
590 # the same, otherwise it classifies the portion of url as path.
591 self.assertEqual(urllib.parse.urlparse("path"),
592 ('','','path','','',''))
593 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
594 ('','www.python.org:80','','','',''))
595 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
596 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000597 # Repeat for bytes input
598 self.assertEqual(urllib.parse.urlparse(b"path"),
599 (b'',b'',b'path',b'',b'',b''))
600 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
601 (b'',b'www.python.org:80',b'',b'',b'',b''))
602 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
603 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000604
605 def test_portseparator(self):
606 # Issue 754016 makes changes for port separator ':' from scheme separator
607 self.assertEqual(urllib.parse.urlparse("path:80"),
608 ('','','path:80','','',''))
609 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
610 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
611 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
612 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000613 # As usual, need to check bytes input as well
614 self.assertEqual(urllib.parse.urlparse(b"path:80"),
615 (b'',b'',b'path:80',b'',b'',b''))
616 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
617 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
618 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
619 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000620
Facundo Batista2ac5de22008-07-07 18:24:11 +0000621 def test_usingsys(self):
622 # Issue 3314: sys module is used in the error
623 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
624
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000625 def test_anyscheme(self):
626 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000627 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
628 ('s3', 'foo.com', '/stuff', '', '', ''))
629 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
630 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000631 # And for bytes...
632 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
633 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
634 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
635 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
636
637 def test_mixed_types_rejected(self):
638 # Several functions that process either strings or ASCII encoded bytes
639 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000640 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000641 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000642 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000643 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000644 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000645 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000646 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000647 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000648 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000649 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000650 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000651 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000652 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000653 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000654 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000655 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000656 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000657 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000658 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000659 urllib.parse.urljoin(b"http://python.org", "http://python.org")
660
661 def _check_result_type(self, str_type):
662 num_args = len(str_type._fields)
663 bytes_type = str_type._encoded_counterpart
664 self.assertIs(bytes_type._decoded_counterpart, str_type)
665 str_args = ('',) * num_args
666 bytes_args = (b'',) * num_args
667 str_result = str_type(*str_args)
668 bytes_result = bytes_type(*bytes_args)
669 encoding = 'ascii'
670 errors = 'strict'
671 self.assertEqual(str_result, str_args)
672 self.assertEqual(bytes_result.decode(), str_args)
673 self.assertEqual(bytes_result.decode(), str_result)
674 self.assertEqual(bytes_result.decode(encoding), str_args)
675 self.assertEqual(bytes_result.decode(encoding), str_result)
676 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
677 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
678 self.assertEqual(bytes_result, bytes_args)
679 self.assertEqual(str_result.encode(), bytes_args)
680 self.assertEqual(str_result.encode(), bytes_result)
681 self.assertEqual(str_result.encode(encoding), bytes_args)
682 self.assertEqual(str_result.encode(encoding), bytes_result)
683 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
684 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
685
686 def test_result_pairs(self):
687 # Check encoding and decoding between result pairs
688 result_types = [
689 urllib.parse.DefragResult,
690 urllib.parse.SplitResult,
691 urllib.parse.ParseResult,
692 ]
693 for result_type in result_types:
694 self._check_result_type(result_type)
695
Victor Stinner1d87deb2011-01-14 13:05:19 +0000696 def test_parse_qs_encoding(self):
697 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
698 self.assertEqual(result, {'key': ['\u0141\xE9']})
699 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
700 self.assertEqual(result, {'key': ['\u0141\xE9']})
701 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
702 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
703 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
704 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
705 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
706 errors="ignore")
707 self.assertEqual(result, {'key': ['\u0141-']})
708
709 def test_parse_qsl_encoding(self):
710 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
711 self.assertEqual(result, [('key', '\u0141\xE9')])
712 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
713 self.assertEqual(result, [('key', '\u0141\xE9')])
714 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
715 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
716 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
717 self.assertEqual(result, [('key', '\u0141\ufffd-')])
718 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
719 errors="ignore")
720 self.assertEqual(result, [('key', '\u0141-')])
721
722
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000723
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000724def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000725 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000726
727if __name__ == "__main__":
728 test_main()