blob: 628b8ec04aeb0a318e27cbce72049a355a110918 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Skip Montanaro6ec967d2002-03-23 05:32:10 +00002
Benjamin Petersonee8712c2008-05-20 21:35:26 +00003from test import support
Skip Montanaro6ec967d2002-03-23 05:32:10 +00004import unittest
Jeremy Hylton1afc1692008-06-18 20:49:58 +00005import urllib.parse
Fred Drakea4d18a02001-01-05 05:57:04 +00006
Fred Drakea4d18a02001-01-05 05:57:04 +00007RFC1808_BASE = "http://a/b/c/d;p?q#f"
Skip Montanaro6ec967d2002-03-23 05:32:10 +00008RFC2396_BASE = "http://a/b/c/d;p?q"
Senthil Kumarandd3820f2010-05-07 04:19:23 +00009RFC3986_BASE = 'http://a/b/c/d;p?q'
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +000010SIMPLE_BASE = 'http://a/b/c/d'
Fred Drakea4d18a02001-01-05 05:57:04 +000011
Facundo Batistac469d4c2008-09-03 22:49:01 +000012# A list of test cases. Each test case is a a two-tuple that contains
13# a string with the query and a dictionary with the expected result.
14
15parse_qsl_test_cases = [
16 ("", []),
17 ("&", []),
18 ("&&", []),
19 ("=", [('', '')]),
20 ("=a", [('', 'a')]),
21 ("a", [('a', '')]),
22 ("a=", [('a', '')]),
23 ("a=", [('a', '')]),
24 ("&a=b", [('a', 'b')]),
25 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
26 ("a=1&a=2", [('a', '1'), ('a', '2')]),
Nick Coghlan9fc443c2010-11-30 15:48:08 +000027 (b"", []),
28 (b"&", []),
29 (b"&&", []),
30 (b"=", [(b'', b'')]),
31 (b"=a", [(b'', b'a')]),
32 (b"a", [(b'a', b'')]),
33 (b"a=", [(b'a', b'')]),
34 (b"a=", [(b'a', b'')]),
35 (b"&a=b", [(b'a', b'b')]),
36 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
37 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
Facundo Batistac469d4c2008-09-03 22:49:01 +000038]
39
Skip Montanaro6ec967d2002-03-23 05:32:10 +000040class UrlParseTestCase(unittest.TestCase):
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000041
42 def checkRoundtrips(self, url, parsed, split):
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043 result = urllib.parse.urlparse(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000044 self.assertEqual(result, parsed)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000045 t = (result.scheme, result.netloc, result.path,
46 result.params, result.query, result.fragment)
47 self.assertEqual(t, parsed)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000048 # put it back together and it should be the same
Jeremy Hylton1afc1692008-06-18 20:49:58 +000049 result2 = urllib.parse.urlunparse(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000050 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000051 self.assertEqual(result2, result.geturl())
52
53 # the result of geturl() is a fixpoint; we can always parse it
54 # again to get the same result:
Jeremy Hylton1afc1692008-06-18 20:49:58 +000055 result3 = urllib.parse.urlparse(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000056 self.assertEqual(result3.geturl(), result.geturl())
57 self.assertEqual(result3, result)
58 self.assertEqual(result3.scheme, result.scheme)
59 self.assertEqual(result3.netloc, result.netloc)
60 self.assertEqual(result3.path, result.path)
61 self.assertEqual(result3.params, result.params)
62 self.assertEqual(result3.query, result.query)
63 self.assertEqual(result3.fragment, result.fragment)
64 self.assertEqual(result3.username, result.username)
65 self.assertEqual(result3.password, result.password)
66 self.assertEqual(result3.hostname, result.hostname)
67 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000068
69 # check the roundtrip using urlsplit() as well
Jeremy Hylton1afc1692008-06-18 20:49:58 +000070 result = urllib.parse.urlsplit(url)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000071 self.assertEqual(result, split)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000072 t = (result.scheme, result.netloc, result.path,
73 result.query, result.fragment)
74 self.assertEqual(t, split)
Jeremy Hylton1afc1692008-06-18 20:49:58 +000075 result2 = urllib.parse.urlunsplit(result)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000076 self.assertEqual(result2, url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000077 self.assertEqual(result2, result.geturl())
78
79 # check the fixpoint property of re-parsing the result of geturl()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000080 result3 = urllib.parse.urlsplit(result.geturl())
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000081 self.assertEqual(result3.geturl(), result.geturl())
82 self.assertEqual(result3, result)
83 self.assertEqual(result3.scheme, result.scheme)
84 self.assertEqual(result3.netloc, result.netloc)
85 self.assertEqual(result3.path, result.path)
86 self.assertEqual(result3.query, result.query)
87 self.assertEqual(result3.fragment, result.fragment)
88 self.assertEqual(result3.username, result.username)
89 self.assertEqual(result3.password, result.password)
90 self.assertEqual(result3.hostname, result.hostname)
91 self.assertEqual(result3.port, result.port)
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000092
Facundo Batistac469d4c2008-09-03 22:49:01 +000093 def test_qsl(self):
94 for orig, expect in parse_qsl_test_cases:
95 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
96 self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
97
98
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +000099 def test_roundtrips(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000100 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000101 ('file:///tmp/junk.txt',
102 ('file', '', '/tmp/junk.txt', '', '', ''),
103 ('file', '', '/tmp/junk.txt', '', '')),
Neal Norwitz68b539e2003-01-06 06:58:31 +0000104 ('imap://mail.python.org/mbox1',
105 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
106 ('imap', 'mail.python.org', '/mbox1', '', '')),
Skip Montanarof09b88e2003-01-06 20:27:03 +0000107 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000108 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
109 '', '', ''),
110 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
111 '', '')),
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000112 ('nfs://server/path/to/file.txt',
113 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
114 ('nfs', 'server', '/path/to/file.txt', '', '')),
Fred Drake50747fc2005-07-29 15:56:32 +0000115 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
116 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
117 '', '', ''),
118 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
Senthil Kumaranead169d2010-05-13 03:37:23 +0000119 '', '')),
120 ('git+ssh://git@github.com/user/project.git',
121 ('git+ssh', 'git@github.com','/user/project.git',
122 '','',''),
123 ('git+ssh', 'git@github.com','/user/project.git',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000124 '', '')),
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000125 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000126 def _encode(t):
127 return (t[0].encode('ascii'),
128 tuple(x.encode('ascii') for x in t[1]),
129 tuple(x.encode('ascii') for x in t[2]))
130 bytes_cases = [_encode(x) for x in str_cases]
131 for url, parsed, split in str_cases + bytes_cases:
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000132 self.checkRoundtrips(url, parsed, split)
Michael W. Hudsonbd3e7712002-03-18 13:06:00 +0000133
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000134 def test_http_roundtrips(self):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000135 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000136 # so we test both 'http:' and 'https:' in all the following.
137 # Three cheers for white box knowledge!
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000138 str_cases = [
Johannes Gijsbers41e4faa2005-01-09 15:29:10 +0000139 ('://www.python.org',
140 ('www.python.org', '', '', '', ''),
141 ('www.python.org', '', '', '')),
142 ('://www.python.org#abc',
143 ('www.python.org', '', '', '', 'abc'),
144 ('www.python.org', '', '', 'abc')),
145 ('://www.python.org?q=abc',
146 ('www.python.org', '', '', 'q=abc', ''),
147 ('www.python.org', '', 'q=abc', '')),
148 ('://www.python.org/#abc',
149 ('www.python.org', '/', '', '', 'abc'),
150 ('www.python.org', '/', '', 'abc')),
151 ('://a/b/c/d;p?q#f',
152 ('a', '/b/c/d', 'p', 'q', 'f'),
153 ('a', '/b/c/d;p', 'q', 'f')),
154 ]
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000155 def _encode(t):
156 return (t[0].encode('ascii'),
157 tuple(x.encode('ascii') for x in t[1]),
158 tuple(x.encode('ascii') for x in t[2]))
159 bytes_cases = [_encode(x) for x in str_cases]
160 str_schemes = ('http', 'https')
161 bytes_schemes = (b'http', b'https')
162 str_tests = str_schemes, str_cases
163 bytes_tests = bytes_schemes, bytes_cases
164 for schemes, test_cases in (str_tests, bytes_tests):
165 for scheme in schemes:
166 for url, parsed, split in test_cases:
167 url = scheme + url
168 parsed = (scheme,) + parsed
169 split = (scheme,) + split
170 self.checkRoundtrips(url, parsed, split)
Fred Drake70705652002-10-16 21:02:36 +0000171
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000172 def checkJoin(self, base, relurl, expected):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000173 str_components = (base, relurl, expected)
174 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
175 bytes_components = baseb, relurlb, expectedb = [
176 x.encode('ascii') for x in str_components]
177 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
Guido van Rossumbbc05682002-10-14 19:59:54 +0000178
179 def test_unparse_parse(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000180 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
181 bytes_cases = [x.encode('ascii') for x in str_cases]
182 for u in str_cases + bytes_cases:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000183 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
184 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
Fred Drakea4d18a02001-01-05 05:57:04 +0000185
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000186 def test_RFC1808(self):
187 # "normal" cases from RFC 1808:
188 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
189 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
190 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
191 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
192 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
193 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000194 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
195 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
196 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
197 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
198 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
199 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000200 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
201 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
202 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
203 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
204 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
205 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
206 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
207 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
208 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
209 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
Fred Drakea4d18a02001-01-05 05:57:04 +0000210
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000211 # "abnormal" cases from RFC 1808:
212 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
213 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
214 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
215 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
216 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
217 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
218 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
219 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
220 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
221 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
222 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
223 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
224 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
Fred Drakea4d18a02001-01-05 05:57:04 +0000225
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000226 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
227 # so we'll not actually run these tests (which expect 1808 behavior).
228 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
229 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
Fred Drakea4d18a02001-01-05 05:57:04 +0000230
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000231 def test_RFC2396(self):
232 # cases from RFC 2396
Fred Drakea4d18a02001-01-05 05:57:04 +0000233
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000234
235 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
236 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
237 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
238 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
239 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
240 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
241 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
242 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
243 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
244 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
245 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
246 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
247 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
248 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
249 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
250 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
251 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
252 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
253 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
254 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
255 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
256 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
257 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
258 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
259 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
260 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
261 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
262 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
263 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
264 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
265 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
266 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
267 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
268 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
269 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
270 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
271 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
272 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
273 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
274
Facundo Batista23e38562008-08-14 16:55:14 +0000275 def test_RFC3986(self):
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000276 # Test cases from RFC3986
Facundo Batista23e38562008-08-14 16:55:14 +0000277 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
278 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
Senthil Kumarandd3820f2010-05-07 04:19:23 +0000279 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
280 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
281 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
282 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
283 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
284 self.checkJoin(RFC3986_BASE, '//g','http://g')
285 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
286 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
287 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
288 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
289 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
290 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
291 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
292 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
293 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
294 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
295 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
296 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
297 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
298 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
299 self.checkJoin(RFC3986_BASE, '../..','http://a/')
300 self.checkJoin(RFC3986_BASE, '../../','http://a/')
301 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
302
303 #Abnormal Examples
304
305 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
306 # Tests are here for reference.
307
308 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
309 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
310 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
311 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
312
313 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
314 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
315 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
316 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
317 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
318 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
319 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
320 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
321 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
322 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
323 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
324 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
325 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
326 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
327 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
328 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
Facundo Batista23e38562008-08-14 16:55:14 +0000329
Senthil Kumarandca5b862010-12-17 04:48:45 +0000330 # Test for issue9721
331 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
332
Senthil Kumaranaa69d4d2010-07-14 10:21:22 +0000333 def test_urljoins(self):
334 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
335 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
336 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
337 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
338 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
339 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
340 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
341 self.checkJoin(SIMPLE_BASE, '//g','http://g')
342 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
343 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
344 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
345 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
346 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
347 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
348 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
349 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
350 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
351 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
352 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
353 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
354 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
355 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
356 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
357 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
358 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
359 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
360 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
361 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
362 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
363
Senthil Kumaranad02d232010-04-16 03:02:13 +0000364 def test_RFC2732(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000365 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000366 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
367 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
368 ('http://[::1]:5432/foo/', '::1', 5432),
369 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
370 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
371 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
372 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
373 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
374 ('http://[::ffff:12.34.56.78]:5432/foo/',
375 '::ffff:12.34.56.78', 5432),
376 ('http://Test.python.org/foo/', 'test.python.org', None),
377 ('http://12.34.56.78/foo/', '12.34.56.78', None),
378 ('http://[::1]/foo/', '::1', None),
379 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
380 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
381 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
382 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
383 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
384 ('http://[::ffff:12.34.56.78]/foo/',
385 '::ffff:12.34.56.78', None),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000386 ]
387 def _encode(t):
388 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
389 bytes_cases = [_encode(x) for x in str_cases]
390 for url, hostname, port in str_cases + bytes_cases:
Senthil Kumaranad02d232010-04-16 03:02:13 +0000391 urlparsed = urllib.parse.urlparse(url)
392 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
393
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000394 str_cases = [
Senthil Kumaranad02d232010-04-16 03:02:13 +0000395 'http://::12.34.56.78]/',
396 'http://[::1/foo/',
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000397 'ftp://[::1/foo/bad]/bad',
Senthil Kumaran2eaef052010-04-20 20:42:50 +0000398 'http://[::1/foo/bad]/bad',
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000399 'http://[::ffff:12.34.56.78']
400 bytes_cases = [x.encode('ascii') for x in str_cases]
401 for invalid_url in str_cases + bytes_cases:
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000402 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
Senthil Kumaranad02d232010-04-16 03:02:13 +0000403
Fred Drake70705652002-10-16 21:02:36 +0000404 def test_urldefrag(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000405 str_cases = [
Fred Drake70705652002-10-16 21:02:36 +0000406 ('http://python.org#frag', 'http://python.org', 'frag'),
407 ('http://python.org', 'http://python.org', ''),
408 ('http://python.org/#frag', 'http://python.org/', 'frag'),
409 ('http://python.org/', 'http://python.org/', ''),
410 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
411 ('http://python.org/?q', 'http://python.org/?q', ''),
412 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
413 ('http://python.org/p?q', 'http://python.org/p?q', ''),
414 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
415 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000416 ]
417 def _encode(t):
418 return type(t)(x.encode('ascii') for x in t)
419 bytes_cases = [_encode(x) for x in str_cases]
420 for url, defrag, frag in str_cases + bytes_cases:
421 result = urllib.parse.urldefrag(url)
422 self.assertEqual(result.geturl(), url)
423 self.assertEqual(result, (defrag, frag))
424 self.assertEqual(result.url, defrag)
425 self.assertEqual(result.fragment, frag)
Fred Drake70705652002-10-16 21:02:36 +0000426
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000427 def test_urlsplit_attributes(self):
428 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000429 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000430 self.assertEqual(p.scheme, "http")
431 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
432 self.assertEqual(p.path, "/doc/")
433 self.assertEqual(p.query, "")
434 self.assertEqual(p.fragment, "frag")
435 self.assertEqual(p.username, None)
436 self.assertEqual(p.password, None)
437 self.assertEqual(p.hostname, "www.python.org")
438 self.assertEqual(p.port, None)
439 # geturl() won't return exactly the original URL in this case
440 # since the scheme is always case-normalized
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000441 # We handle this by ignoring the first 4 characters of the URL
442 self.assertEqual(p.geturl()[4:], url[4:])
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000443
444 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000445 p = urllib.parse.urlsplit(url)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000446 self.assertEqual(p.scheme, "http")
447 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
448 self.assertEqual(p.path, "/doc/")
449 self.assertEqual(p.query, "query=yes")
450 self.assertEqual(p.fragment, "frag")
451 self.assertEqual(p.username, "User")
452 self.assertEqual(p.password, "Pass")
453 self.assertEqual(p.hostname, "www.python.org")
454 self.assertEqual(p.port, 80)
455 self.assertEqual(p.geturl(), url)
456
Christian Heimesfaf2f632008-01-06 16:59:19 +0000457 # Addressing issue1698, which suggests Username can contain
458 # "@" characters. Though not RFC compliant, many ftp sites allow
459 # and request email addresses as usernames.
460
461 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000462 p = urllib.parse.urlsplit(url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000463 self.assertEqual(p.scheme, "http")
464 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
465 self.assertEqual(p.path, "/doc/")
466 self.assertEqual(p.query, "query=yes")
467 self.assertEqual(p.fragment, "frag")
468 self.assertEqual(p.username, "User@example.com")
469 self.assertEqual(p.password, "Pass")
470 self.assertEqual(p.hostname, "www.python.org")
471 self.assertEqual(p.port, 80)
472 self.assertEqual(p.geturl(), url)
473
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000474 # And check them all again, only with bytes this time
475 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
476 p = urllib.parse.urlsplit(url)
477 self.assertEqual(p.scheme, b"http")
478 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
479 self.assertEqual(p.path, b"/doc/")
480 self.assertEqual(p.query, b"")
481 self.assertEqual(p.fragment, b"frag")
482 self.assertEqual(p.username, None)
483 self.assertEqual(p.password, None)
484 self.assertEqual(p.hostname, b"www.python.org")
485 self.assertEqual(p.port, None)
486 self.assertEqual(p.geturl()[4:], url[4:])
487
488 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
489 p = urllib.parse.urlsplit(url)
490 self.assertEqual(p.scheme, b"http")
491 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
492 self.assertEqual(p.path, b"/doc/")
493 self.assertEqual(p.query, b"query=yes")
494 self.assertEqual(p.fragment, b"frag")
495 self.assertEqual(p.username, b"User")
496 self.assertEqual(p.password, b"Pass")
497 self.assertEqual(p.hostname, b"www.python.org")
498 self.assertEqual(p.port, 80)
499 self.assertEqual(p.geturl(), url)
500
501 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
502 p = urllib.parse.urlsplit(url)
503 self.assertEqual(p.scheme, b"http")
504 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
505 self.assertEqual(p.path, b"/doc/")
506 self.assertEqual(p.query, b"query=yes")
507 self.assertEqual(p.fragment, b"frag")
508 self.assertEqual(p.username, b"User@example.com")
509 self.assertEqual(p.password, b"Pass")
510 self.assertEqual(p.hostname, b"www.python.org")
511 self.assertEqual(p.port, 80)
512 self.assertEqual(p.geturl(), url)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000513
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000514 def test_attributes_bad_port(self):
515 """Check handling of non-integer ports."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000516 p = urllib.parse.urlsplit("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000517 self.assertEqual(p.netloc, "www.example.net:foo")
518 self.assertRaises(ValueError, lambda: p.port)
519
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000520 p = urllib.parse.urlparse("http://www.example.net:foo")
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000521 self.assertEqual(p.netloc, "www.example.net:foo")
522 self.assertRaises(ValueError, lambda: p.port)
523
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000524 # Once again, repeat ourselves to test bytes
525 p = urllib.parse.urlsplit(b"http://www.example.net:foo")
526 self.assertEqual(p.netloc, b"www.example.net:foo")
527 self.assertRaises(ValueError, lambda: p.port)
528
529 p = urllib.parse.urlparse(b"http://www.example.net:foo")
530 self.assertEqual(p.netloc, b"www.example.net:foo")
531 self.assertRaises(ValueError, lambda: p.port)
532
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000533 def test_attributes_without_netloc(self):
534 # This example is straight from RFC 3261. It looks like it
535 # should allow the username, hostname, and port to be filled
536 # in, but doesn't. Since it's a URI and doesn't use the
537 # scheme://netloc syntax, the netloc and related attributes
538 # should be left empty.
539 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000540 p = urllib.parse.urlsplit(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000541 self.assertEqual(p.netloc, "")
542 self.assertEqual(p.username, None)
543 self.assertEqual(p.password, None)
544 self.assertEqual(p.hostname, None)
545 self.assertEqual(p.port, None)
546 self.assertEqual(p.geturl(), uri)
547
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000548 p = urllib.parse.urlparse(uri)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000549 self.assertEqual(p.netloc, "")
550 self.assertEqual(p.username, None)
551 self.assertEqual(p.password, None)
552 self.assertEqual(p.hostname, None)
553 self.assertEqual(p.port, None)
554 self.assertEqual(p.geturl(), uri)
555
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000556 # You guessed it, repeating the test with bytes input
557 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
558 p = urllib.parse.urlsplit(uri)
559 self.assertEqual(p.netloc, b"")
560 self.assertEqual(p.username, None)
561 self.assertEqual(p.password, None)
562 self.assertEqual(p.hostname, None)
563 self.assertEqual(p.port, None)
564 self.assertEqual(p.geturl(), uri)
565
566 p = urllib.parse.urlparse(uri)
567 self.assertEqual(p.netloc, b"")
568 self.assertEqual(p.username, None)
569 self.assertEqual(p.password, None)
570 self.assertEqual(p.hostname, None)
571 self.assertEqual(p.port, None)
572 self.assertEqual(p.geturl(), uri)
573
Christian Heimesfaf2f632008-01-06 16:59:19 +0000574 def test_noslash(self):
575 # Issue 1637: http://foo.com?query is legal
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000576 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
Christian Heimesfaf2f632008-01-06 16:59:19 +0000577 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000578 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
579 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000580
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000581 def test_withoutscheme(self):
582 # Test urlparse without scheme
583 # Issue 754016: urlparse goes wrong with IP:port without scheme
584 # RFC 1808 specifies that netloc should start with //, urlparse expects
585 # the same, otherwise it classifies the portion of url as path.
586 self.assertEqual(urllib.parse.urlparse("path"),
587 ('','','path','','',''))
588 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
589 ('','www.python.org:80','','','',''))
590 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
591 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000592 # Repeat for bytes input
593 self.assertEqual(urllib.parse.urlparse(b"path"),
594 (b'',b'',b'path',b'',b'',b''))
595 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
596 (b'',b'www.python.org:80',b'',b'',b'',b''))
597 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
598 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000599
600 def test_portseparator(self):
601 # Issue 754016 makes changes for port separator ':' from scheme separator
602 self.assertEqual(urllib.parse.urlparse("path:80"),
603 ('','','path:80','','',''))
604 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
605 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
606 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
607 ('http','www.python.org:80','','','',''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000608 # As usual, need to check bytes input as well
609 self.assertEqual(urllib.parse.urlparse(b"path:80"),
610 (b'',b'',b'path:80',b'',b'',b''))
611 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
612 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
613 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
614 (b'http',b'www.python.org:80',b'',b'',b'',b''))
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000615
Facundo Batista2ac5de22008-07-07 18:24:11 +0000616 def test_usingsys(self):
617 # Issue 3314: sys module is used in the error
618 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
619
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000620 def test_anyscheme(self):
621 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
Ezio Melotti5e15efa2010-02-19 14:49:02 +0000622 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
623 ('s3', 'foo.com', '/stuff', '', '', ''))
624 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
625 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000626 # And for bytes...
627 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
628 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
629 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
630 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
631
632 def test_mixed_types_rejected(self):
633 # Several functions that process either strings or ASCII encoded bytes
634 # accept multiple arguments. Check they reject mixed type input
Ezio Melottied3a7d22010-12-01 02:32:32 +0000635 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000636 urllib.parse.urlparse("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000637 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000638 urllib.parse.urlparse(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000639 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000640 urllib.parse.urlsplit("www.python.org", b"http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000641 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000642 urllib.parse.urlsplit(b"www.python.org", "http")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000643 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000644 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000645 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000646 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000647 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000648 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000649 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000650 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
Ezio Melottied3a7d22010-12-01 02:32:32 +0000651 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000652 urllib.parse.urljoin("http://python.org", b"http://python.org")
Ezio Melottied3a7d22010-12-01 02:32:32 +0000653 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000654 urllib.parse.urljoin(b"http://python.org", "http://python.org")
655
656 def _check_result_type(self, str_type):
657 num_args = len(str_type._fields)
658 bytes_type = str_type._encoded_counterpart
659 self.assertIs(bytes_type._decoded_counterpart, str_type)
660 str_args = ('',) * num_args
661 bytes_args = (b'',) * num_args
662 str_result = str_type(*str_args)
663 bytes_result = bytes_type(*bytes_args)
664 encoding = 'ascii'
665 errors = 'strict'
666 self.assertEqual(str_result, str_args)
667 self.assertEqual(bytes_result.decode(), str_args)
668 self.assertEqual(bytes_result.decode(), str_result)
669 self.assertEqual(bytes_result.decode(encoding), str_args)
670 self.assertEqual(bytes_result.decode(encoding), str_result)
671 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
672 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
673 self.assertEqual(bytes_result, bytes_args)
674 self.assertEqual(str_result.encode(), bytes_args)
675 self.assertEqual(str_result.encode(), bytes_result)
676 self.assertEqual(str_result.encode(encoding), bytes_args)
677 self.assertEqual(str_result.encode(encoding), bytes_result)
678 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
679 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
680
681 def test_result_pairs(self):
682 # Check encoding and decoding between result pairs
683 result_types = [
684 urllib.parse.DefragResult,
685 urllib.parse.SplitResult,
686 urllib.parse.ParseResult,
687 ]
688 for result_type in result_types:
689 self._check_result_type(result_type)
690
Victor Stinner1d87deb2011-01-14 13:05:19 +0000691 def test_parse_qs_encoding(self):
692 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
693 self.assertEqual(result, {'key': ['\u0141\xE9']})
694 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
695 self.assertEqual(result, {'key': ['\u0141\xE9']})
696 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
697 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
698 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
699 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
700 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
701 errors="ignore")
702 self.assertEqual(result, {'key': ['\u0141-']})
703
704 def test_parse_qsl_encoding(self):
705 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
706 self.assertEqual(result, [('key', '\u0141\xE9')])
707 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
708 self.assertEqual(result, [('key', '\u0141\xE9')])
709 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
710 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
711 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
712 self.assertEqual(result, [('key', '\u0141\ufffd-')])
713 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
714 errors="ignore")
715 self.assertEqual(result, [('key', '\u0141-')])
716
717
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000718
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000719def test_main():
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000720 support.run_unittest(UrlParseTestCase)
Skip Montanaro6ec967d2002-03-23 05:32:10 +0000721
722if __name__ == "__main__":
723 test_main()