Blame - Lib/test/test_urlparse.py - platform/external/python/cpython3

2002-03-23 05:32:10 +0000

[diff] [blame]

3

import unittest

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

4

import urllib.parse

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

5

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

6

RFC1808_BASE = "http://a/b/c/d;p?q#f"

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

7

RFC2396_BASE = "http://a/b/c/d;p?q"

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

8

RFC3986_BASE = 'http://a/b/c/d;p?q'

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

9

SIMPLE_BASE = 'http://a/b/c/d'

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

10

Senthil Kumaran

257b980

2017-04-04 21:19:43 -0700

[diff] [blame]

11

# Each parse_qsl testcase is a two-tuple that contains

12

# a string with the query and a list with the expected result.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

13

14

parse_qsl_test_cases = [

("", []),

("&", []),

("&&", []),

("=", [('', '')]),

("=a", [('', 'a')]),

("a", [('a', '')]),

("a=", [('a', '')]),

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

22

("&a=b", [('a', 'b')]),

23

("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),

24

("a=1&a=2", [('a', '1'), ('a', '2')]),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

(b"", []),

(b"&", []),

(b"&&", []),

(b"=", [(b'', b'')]),

29

(b"=a", [(b'', b'a')]),

30

(b"a", [(b'a', b'')]),

31

(b"a=", [(b'a', b'')]),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

32

(b"&a=b", [(b'a', b'b')]),

33

(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),

34

(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),

Senthil Kumaran

e38415e

2016-04-16 07:33:15 -0700

[diff] [blame]

35

(";", []),

36

(";;", []),

37

(";a=b", [('a', 'b')]),

38

("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),

39

("a=1;a=2", [('a', '1'), ('a', '2')]),

40

(b";", []),

41

(b";;", []),

42

(b";a=b", [(b'a', b'b')]),

43

(b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),

44

(b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),

45

]

46

Senthil Kumaran

257b980

2017-04-04 21:19:43 -0700

[diff] [blame]

47

# Each parse_qs testcase is a two-tuple that contains

48

# a string with the query and a dictionary with the expected result.

49

Senthil Kumaran

e38415e

2016-04-16 07:33:15 -0700

[diff] [blame]

50

parse_qs_test_cases = [

("", {}),

("&", {}),

("&&", {}),

("=", {'': ['']}),

("=a", {'': ['a']}),

("a", {'a': ['']}),

("a=", {'a': ['']}),

("&a=b", {'a': ['b']}),

59

("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),

60

("a=1&a=2", {'a': ['1', '2']}),

(b"", {}),

(b"&", {}),

(b"&&", {}),

(b"=", {b'': [b'']}),

65

(b"=a", {b'': [b'a']}),

66

(b"a", {b'a': [b'']}),

67

(b"a=", {b'a': [b'']}),

68

(b"&a=b", {b'a': [b'b']}),

69

(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),

70

(b"a=1&a=2", {b'a': [b'1', b'2']}),

71

(";", {}),

72

(";;", {}),

73

(";a=b", {'a': ['b']}),

74

("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),

75

("a=1;a=2", {'a': ['1', '2']}),

76

(b";", {}),

77

(b";;", {}),

78

(b";a=b", {b'a': [b'b']}),

79

(b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),

80

(b"a=1;a=2", {b'a': [b'1', b'2']}),

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

81

]

82

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

83

class UrlParseTestCase(unittest.TestCase):

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

84

85

def checkRoundtrips(self, url, parsed, split):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

86

result = urllib.parse.urlparse(url)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

87

self.assertEqual(result, parsed)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

88

t = (result.scheme, result.netloc, result.path,

89

result.params, result.query, result.fragment)

90

self.assertEqual(t, parsed)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

91

# put it back together and it should be the same

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

92

result2 = urllib.parse.urlunparse(result)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

93

self.assertEqual(result2, url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

94

self.assertEqual(result2, result.geturl())

95

96

# the result of geturl() is a fixpoint; we can always parse it

97

# again to get the same result:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

98

result3 = urllib.parse.urlparse(result.geturl())

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

99

self.assertEqual(result3.geturl(), result.geturl())

100

self.assertEqual(result3, result)

101

self.assertEqual(result3.scheme, result.scheme)

102

self.assertEqual(result3.netloc, result.netloc)

103

self.assertEqual(result3.path, result.path)

104

self.assertEqual(result3.params, result.params)

105

self.assertEqual(result3.query, result.query)

106

self.assertEqual(result3.fragment, result.fragment)

107

self.assertEqual(result3.username, result.username)

108

self.assertEqual(result3.password, result.password)

109

self.assertEqual(result3.hostname, result.hostname)

110

self.assertEqual(result3.port, result.port)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

111

112

# check the roundtrip using urlsplit() as well

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

113

result = urllib.parse.urlsplit(url)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

114

self.assertEqual(result, split)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

115

t = (result.scheme, result.netloc, result.path,

116

result.query, result.fragment)

117

self.assertEqual(t, split)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

118

result2 = urllib.parse.urlunsplit(result)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

119

self.assertEqual(result2, url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

120

self.assertEqual(result2, result.geturl())

121

122

# check the fixpoint property of re-parsing the result of geturl()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

123

result3 = urllib.parse.urlsplit(result.geturl())

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

124

self.assertEqual(result3.geturl(), result.geturl())

125

self.assertEqual(result3, result)

126

self.assertEqual(result3.scheme, result.scheme)

127

self.assertEqual(result3.netloc, result.netloc)

128

self.assertEqual(result3.path, result.path)

129

self.assertEqual(result3.query, result.query)

130

self.assertEqual(result3.fragment, result.fragment)

131

self.assertEqual(result3.username, result.username)

132

self.assertEqual(result3.password, result.password)

133

self.assertEqual(result3.hostname, result.hostname)

134

self.assertEqual(result3.port, result.port)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

135

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

136

def test_qsl(self):

137

for orig, expect in parse_qsl_test_cases:

138

result = urllib.parse.parse_qsl(orig, keep_blank_values=True)

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

139

self.assertEqual(result, expect, "Error parsing %r" % orig)

140

expect_without_blanks = [v for v in expect if len(v[1])]

141

result = urllib.parse.parse_qsl(orig, keep_blank_values=False)

142

self.assertEqual(result, expect_without_blanks,

143

"Error parsing %r" % orig)

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

144

Senthil Kumaran

e38415e

2016-04-16 07:33:15 -0700

[diff] [blame]

145

def test_qs(self):

146

for orig, expect in parse_qs_test_cases:

147

result = urllib.parse.parse_qs(orig, keep_blank_values=True)

148

self.assertEqual(result, expect, "Error parsing %r" % orig)

149

expect_without_blanks = {v: expect[v]

150

for v in expect if len(expect[v][0])}

151

result = urllib.parse.parse_qs(orig, keep_blank_values=False)

152

self.assertEqual(result, expect_without_blanks,

153

"Error parsing %r" % orig)

154

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

155

def test_roundtrips(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

156

str_cases = [

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

157

('file:///tmp/junk.txt',

158

('file', '', '/tmp/junk.txt', '', '', ''),

159

('file', '', '/tmp/junk.txt', '', '')),

Neal Norwitz

68b539e

2003-01-06 06:58:31 +0000

[diff] [blame]

160

('imap://mail.python.org/mbox1',

161

('imap', 'mail.python.org', '/mbox1', '', '', ''),

162

('imap', 'mail.python.org', '/mbox1', '', '')),

Skip Montanaro

f09b88e

2003-01-06 20:27:03 +0000

[diff] [blame]

163

('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

164

('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',

165

'', '', ''),

166

('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',

167

'', '')),

Senthil Kumaran

eaaec27

2009-03-30 21:54:41 +0000

[diff] [blame]

168

('nfs://server/path/to/file.txt',

169

('nfs', 'server', '/path/to/file.txt', '', '', ''),

170

('nfs', 'server', '/path/to/file.txt', '', '')),

Fred Drake

50747fc

2005-07-29 15:56:32 +0000

[diff] [blame]

171

('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',

172

('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',

173

'', '', ''),

174

('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',

Senthil Kumaran

ead169d

2010-05-13 03:37:23 +0000

[diff] [blame]

175

'', '')),

176

('git+ssh://git@github.com/user/project.git',

177

('git+ssh', 'git@github.com','/user/project.git',

178

'','',''),

179

('git+ssh', 'git@github.com','/user/project.git',

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

180

'', '')),

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

181

]

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

182

def _encode(t):

183

return (t[0].encode('ascii'),

184

tuple(x.encode('ascii') for x in t[1]),

185

tuple(x.encode('ascii') for x in t[2]))

186

bytes_cases = [_encode(x) for x in str_cases]

187

for url, parsed, split in str_cases + bytes_cases:

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

188

self.checkRoundtrips(url, parsed, split)

Michael W. Hudson

bd3e771

2002-03-18 13:06:00 +0000

[diff] [blame]

189

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

190

def test_http_roundtrips(self):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

191

# urllib.parse.urlsplit treats 'http:' as an optimized special case,

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

192

# so we test both 'http:' and 'https:' in all the following.

193

# Three cheers for white box knowledge!

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

194

str_cases = [

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

195

('://www.python.org',

196

('www.python.org', '', '', '', ''),

197

('www.python.org', '', '', '')),

198

('://www.python.org#abc',

199

('www.python.org', '', '', '', 'abc'),

200

('www.python.org', '', '', 'abc')),

201

('://www.python.org?q=abc',

202

('www.python.org', '', '', 'q=abc', ''),

203

('www.python.org', '', 'q=abc', '')),

204

('://www.python.org/#abc',

205

('www.python.org', '/', '', '', 'abc'),

206

('www.python.org', '/', '', 'abc')),

207

('://a/b/c/d;p?q#f',

208

('a', '/b/c/d', 'p', 'q', 'f'),

209

('a', '/b/c/d;p', 'q', 'f')),

210

]

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

211

def _encode(t):

212

return (t[0].encode('ascii'),

213

tuple(x.encode('ascii') for x in t[1]),

214

tuple(x.encode('ascii') for x in t[2]))

215

bytes_cases = [_encode(x) for x in str_cases]

216

str_schemes = ('http', 'https')

217

bytes_schemes = (b'http', b'https')

218

str_tests = str_schemes, str_cases

219

bytes_tests = bytes_schemes, bytes_cases

220

for schemes, test_cases in (str_tests, bytes_tests):

221

for scheme in schemes:

222

for url, parsed, split in test_cases:

223

url = scheme + url

224

parsed = (scheme,) + parsed

225

split = (scheme,) + split

226

self.checkRoundtrips(url, parsed, split)

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

227

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

228

def checkJoin(self, base, relurl, expected):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

229

str_components = (base, relurl, expected)

230

self.assertEqual(urllib.parse.urljoin(base, relurl), expected)

231

bytes_components = baseb, relurlb, expectedb = [

232

x.encode('ascii') for x in str_components]

233

self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)

Guido van Rossum

bbc0568

2002-10-14 19:59:54 +0000

[diff] [blame]

234

235

def test_unparse_parse(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

236

str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]

237

bytes_cases = [x.encode('ascii') for x in str_cases]

238

for u in str_cases + bytes_cases:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

239

self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)

240

self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

241

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

242

def test_RFC1808(self):

243

# "normal" cases from RFC 1808:

244

self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')

245

self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')

246

self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')

247

self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')

248

self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')

249

self.checkJoin(RFC1808_BASE, '//g', 'http://g')

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

250

self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')

251

self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')

252

self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')

253

self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')

254

self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')

255

self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

256

self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')

257

self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')

258

self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')

259

self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')

260

self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')

261

self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')

262

self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')

263

self.checkJoin(RFC1808_BASE, '../..', 'http://a/')

264

self.checkJoin(RFC1808_BASE, '../../', 'http://a/')

265

self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

266

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

267

# "abnormal" cases from RFC 1808:

268

self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

269

self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')

270

self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')

271

self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')

272

self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')

273

self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')

274

self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')

275

self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')

276

self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

277

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

278

# RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),

279

# so we'll not actually run these tests (which expect 1808 behavior).

280

#self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')

281

#self.checkJoin(RFC1808_BASE, 'http:', 'http:')

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

282

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

283

# XXX: The following tests are no longer compatible with RFC3986

284

# self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')

285

# self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')

286

# self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')

287

# self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')

288

289

Senthil Kumaran

397eb44

2011-04-15 18:20:24 +0800

[diff] [blame]

290

def test_RFC2368(self):

291

# Issue 11467: path that starts with a number is not parsed correctly

292

self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),

293

('mailto', '', '1337@example.org', '', '', ''))

294

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

295

def test_RFC2396(self):

296

# cases from RFC 2396

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

297

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

298

self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')

299

self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')

300

self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')

301

self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')

302

self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')

303

self.checkJoin(RFC2396_BASE, '//g', 'http://g')

304

self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')

305

self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')

306

self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')

307

self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')

308

self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')

309

self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')

310

self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')

311

self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')

312

self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')

313

self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')

314

self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')

315

self.checkJoin(RFC2396_BASE, '../..', 'http://a/')

316

self.checkJoin(RFC2396_BASE, '../../', 'http://a/')

317

self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')

318

self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

319

self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')

320

self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')

321

self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')

322

self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')

323

self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')

324

self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')

325

self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')

326

self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')

327

self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')

328

self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')

329

self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')

330

self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')

331

self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')

332

self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')

333

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

334

# XXX: The following tests are no longer compatible with RFC3986

335

# self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')

336

# self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')

337

# self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')

338

# self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')

339

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

340

def test_RFC3986(self):

341

self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

342

self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

343

self.checkJoin(RFC3986_BASE, 'g:h','g:h')

344

self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')

345

self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')

346

self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')

347

self.checkJoin(RFC3986_BASE, '/g','http://a/g')

348

self.checkJoin(RFC3986_BASE, '//g','http://g')

349

self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')

350

self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')

351

self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')

352

self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')

353

self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')

354

self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')

355

self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')

356

self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')

357

self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')

358

self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')

359

self.checkJoin(RFC3986_BASE, './','http://a/b/c/')

360

self.checkJoin(RFC3986_BASE, '..','http://a/b/')

361

self.checkJoin(RFC3986_BASE, '../','http://a/b/')

362

self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')

363

self.checkJoin(RFC3986_BASE, '../..','http://a/')

364

self.checkJoin(RFC3986_BASE, '../../','http://a/')

365

self.checkJoin(RFC3986_BASE, '../../g','http://a/g')

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

366

self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

367

Senthil Kumaran

257b980

2017-04-04 21:19:43 -0700

[diff] [blame]

368

# Abnormal Examples

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

369

370

# The 'abnormal scenarios' are incompatible with RFC2986 parsing

371

# Tests are here for reference.

372

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

373

self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')

374

self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')

375

self.checkJoin(RFC3986_BASE, '/./g','http://a/g')

376

self.checkJoin(RFC3986_BASE, '/../g','http://a/g')

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

377

self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')

378

self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')

379

self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')

380

self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')

381

self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')

382

self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')

383

self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')

384

self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')

385

self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')

386

self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')

387

self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')

388

self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')

389

self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')

390

self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')

391

#self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser

392

self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

393

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame]

394

# Test for issue9721

395

self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')

396

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

397

def test_urljoins(self):

398

self.checkJoin(SIMPLE_BASE, 'g:h','g:h')

399

self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')

400

self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')

401

self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')

402

self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')

403

self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')

404

self.checkJoin(SIMPLE_BASE, '/g','http://a/g')

405

self.checkJoin(SIMPLE_BASE, '//g','http://g')

406

self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')

407

self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')

408

self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')

409

self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')

410

self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')

411

self.checkJoin(SIMPLE_BASE, '..','http://a/b/')

412

self.checkJoin(SIMPLE_BASE, '../','http://a/b/')

413

self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')

414

self.checkJoin(SIMPLE_BASE, '../..','http://a/')

415

self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

416

self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')

417

self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

418

self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')

419

self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')

420

self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')

421

self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')

422

self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')

423

self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')

424

self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

425

self.checkJoin('http:///', '..','http:///')

426

self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')

427

self.checkJoin('', 'http://a/./g', 'http://a/./g')

Senthil Kumaran

2a157d2

2011-08-03 18:37:22 +0800

[diff] [blame]

428

self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')

Senthil Kumaran

7ce71f6

2011-08-03 22:08:46 +0800

[diff] [blame]

429

self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')

Berker Peksag

f676748

2016-09-16 14:43:58 +0300

[diff] [blame]

430

self.checkJoin('ws://a/b','g','ws://a/g')

431

self.checkJoin('wss://a/b','g','wss://a/g')

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

432

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

433

# XXX: The following tests are no longer compatible with RFC3986

434

# self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')

435

# self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')

436

Senthil Kumaran

a66e388

2014-09-22 15:49:16 +0800

[diff] [blame]

437

# test for issue22118 duplicate slashes

438

self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')

439

440

# Non-RFC-defined tests, covering variations of base and trailing

441

# slashes

442

self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')

443

self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')

444

self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')

445

self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')

446

self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')

447

self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')

448

Berker Peksag

20416f7

2015-04-16 02:31:14 +0300

[diff] [blame]

449

# issue 23703: don't duplicate filename

450

self.checkJoin('a', 'b', 'b')

451

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

452

def test_RFC2732(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

453

str_cases = [

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

454

('http://Test.python.org:5432/foo/', 'test.python.org', 5432),

455

('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),

456

('http://[::1]:5432/foo/', '::1', 5432),

457

('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),

458

('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),

459

('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',

460

'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),

461

('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),

462

('http://[::ffff:12.34.56.78]:5432/foo/',

463

'::ffff:12.34.56.78', 5432),

464

('http://Test.python.org/foo/', 'test.python.org', None),

465

('http://12.34.56.78/foo/', '12.34.56.78', None),

466

('http://[::1]/foo/', '::1', None),

467

('http://[dead:beef::1]/foo/', 'dead:beef::1', None),

468

('http://[dead:beef::]/foo/', 'dead:beef::', None),

469

('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',

470

'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),

471

('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),

472

('http://[::ffff:12.34.56.78]/foo/',

473

'::ffff:12.34.56.78', None),

Serhiy Storchaka

ff97b08

2014-01-18 18:30:33 +0200

[diff] [blame]

474

('http://Test.python.org:/foo/', 'test.python.org', None),

475

('http://12.34.56.78:/foo/', '12.34.56.78', None),

476

('http://[::1]:/foo/', '::1', None),

477

('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),

478

('http://[dead:beef::]:/foo/', 'dead:beef::', None),

479

('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',

480

'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),

481

('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),

482

('http://[::ffff:12.34.56.78]:/foo/',

483

'::ffff:12.34.56.78', None),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

484

]

485

def _encode(t):

486

return t[0].encode('ascii'), t[1].encode('ascii'), t[2]

487

bytes_cases = [_encode(x) for x in str_cases]

488

for url, hostname, port in str_cases + bytes_cases:

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

489

urlparsed = urllib.parse.urlparse(url)

490

self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))

491

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

492

str_cases = [

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

493

'http://::12.34.56.78]/',

494

'http://[::1/foo/',

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

495

'ftp://[::1/foo/bad]/bad',

Senthil Kumaran

2eaef05

2010-04-20 20:42:50 +0000

[diff] [blame]

496

'http://[::1/foo/bad]/bad',

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

497

'http://[::ffff:12.34.56.78']

498

bytes_cases = [x.encode('ascii') for x in str_cases]

499

for invalid_url in str_cases + bytes_cases:

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

500

self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

501

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

502

def test_urldefrag(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

503

str_cases = [

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

504

('http://python.org#frag', 'http://python.org', 'frag'),

505

('http://python.org', 'http://python.org', ''),

506

('http://python.org/#frag', 'http://python.org/', 'frag'),

507

('http://python.org/', 'http://python.org/', ''),

508

('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),

509

('http://python.org/?q', 'http://python.org/?q', ''),

510

('http://python.org/p#frag', 'http://python.org/p', 'frag'),

511

('http://python.org/p?q', 'http://python.org/p?q', ''),

512

(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),

513

(RFC2396_BASE, 'http://a/b/c/d;p?q', ''),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

514

]

515

def _encode(t):

516

return type(t)(x.encode('ascii') for x in t)

517

bytes_cases = [_encode(x) for x in str_cases]

518

for url, defrag, frag in str_cases + bytes_cases:

519

result = urllib.parse.urldefrag(url)

520

self.assertEqual(result.geturl(), url)

521

self.assertEqual(result, (defrag, frag))

522

self.assertEqual(result.url, defrag)

523

self.assertEqual(result.fragment, frag)

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

524

Коренберг Марк

fbd6051

2017-12-21 17:16:17 +0500

[diff] [blame]

525

def test_urlsplit_scoped_IPv6(self):

526

p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')

527

self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")

528

self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')

529

530

p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')

531

self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")

532

self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')

533

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

534

def test_urlsplit_attributes(self):

535

url = "HTTP://WWW.PYTHON.ORG/doc/#frag"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

536

p = urllib.parse.urlsplit(url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

537

self.assertEqual(p.scheme, "http")

538

self.assertEqual(p.netloc, "WWW.PYTHON.ORG")

539

self.assertEqual(p.path, "/doc/")

540

self.assertEqual(p.query, "")

541

self.assertEqual(p.fragment, "frag")

542

self.assertEqual(p.username, None)

543

self.assertEqual(p.password, None)

544

self.assertEqual(p.hostname, "www.python.org")

545

self.assertEqual(p.port, None)

546

# geturl() won't return exactly the original URL in this case

547

# since the scheme is always case-normalized

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

548

# We handle this by ignoring the first 4 characters of the URL

549

self.assertEqual(p.geturl()[4:], url[4:])

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

550

551

url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

552

p = urllib.parse.urlsplit(url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

553

self.assertEqual(p.scheme, "http")

554

self.assertEqual(p.netloc, "User:Pass@www.python.org:080")

555

self.assertEqual(p.path, "/doc/")

556

self.assertEqual(p.query, "query=yes")

557

self.assertEqual(p.fragment, "frag")

558

self.assertEqual(p.username, "User")

559

self.assertEqual(p.password, "Pass")

560

self.assertEqual(p.hostname, "www.python.org")

561

self.assertEqual(p.port, 80)

562

self.assertEqual(p.geturl(), url)

563

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

564

# Addressing issue1698, which suggests Username can contain

565

# "@" characters. Though not RFC compliant, many ftp sites allow

566

# and request email addresses as usernames.

567

568

url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

569

p = urllib.parse.urlsplit(url)

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

570

self.assertEqual(p.scheme, "http")

571

self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")

572

self.assertEqual(p.path, "/doc/")

573

self.assertEqual(p.query, "query=yes")

574

self.assertEqual(p.fragment, "frag")

575

self.assertEqual(p.username, "User@example.com")

576

self.assertEqual(p.password, "Pass")

577

self.assertEqual(p.hostname, "www.python.org")

578

self.assertEqual(p.port, 80)

579

self.assertEqual(p.geturl(), url)

580

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

581

# And check them all again, only with bytes this time

582

url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"

583

p = urllib.parse.urlsplit(url)

584

self.assertEqual(p.scheme, b"http")

585

self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")

586

self.assertEqual(p.path, b"/doc/")

587

self.assertEqual(p.query, b"")

588

self.assertEqual(p.fragment, b"frag")

589

self.assertEqual(p.username, None)

590

self.assertEqual(p.password, None)

591

self.assertEqual(p.hostname, b"www.python.org")

592

self.assertEqual(p.port, None)

593

self.assertEqual(p.geturl()[4:], url[4:])

594

595

url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"

596

p = urllib.parse.urlsplit(url)

597

self.assertEqual(p.scheme, b"http")

598

self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")

599

self.assertEqual(p.path, b"/doc/")

600

self.assertEqual(p.query, b"query=yes")

601

self.assertEqual(p.fragment, b"frag")

602

self.assertEqual(p.username, b"User")

603

self.assertEqual(p.password, b"Pass")

604

self.assertEqual(p.hostname, b"www.python.org")

605

self.assertEqual(p.port, 80)

606

self.assertEqual(p.geturl(), url)

607

608

url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"

609

p = urllib.parse.urlsplit(url)

610

self.assertEqual(p.scheme, b"http")

611

self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")

612

self.assertEqual(p.path, b"/doc/")

613

self.assertEqual(p.query, b"query=yes")

614

self.assertEqual(p.fragment, b"frag")

615

self.assertEqual(p.username, b"User@example.com")

616

self.assertEqual(p.password, b"Pass")

617

self.assertEqual(p.hostname, b"www.python.org")

618

self.assertEqual(p.port, 80)

619

self.assertEqual(p.geturl(), url)

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

620

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

621

# Verify an illegal port raises ValueError

Senthil Kumaran

2fc5a50

2012-05-24 21:56:17 +0800

[diff] [blame]

622

url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"

623

p = urllib.parse.urlsplit(url)

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

624

with self.assertRaisesRegex(ValueError, "out of range"):

625

p.port

Senthil Kumaran

2fc5a50

2012-05-24 21:56:17 +0800

[diff] [blame]

626

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

627

def test_attributes_bad_port(self):

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

628

"""Check handling of invalid ports."""

629

for bytes in (False, True):

630

for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):

631

for port in ("foo", "1.5", "-1", "0x10"):

632

with self.subTest(bytes=bytes, parse=parse, port=port):

633

netloc = "www.example.net:" + port

634

url = "http://" + netloc

635

if bytes:

636

netloc = netloc.encode("ascii")

637

url = url.encode("ascii")

638

p = parse(url)

639

self.assertEqual(p.netloc, netloc)

640

with self.assertRaises(ValueError):

641

p.port

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

642

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

643

def test_attributes_without_netloc(self):

644

# This example is straight from RFC 3261. It looks like it

645

# should allow the username, hostname, and port to be filled

646

# in, but doesn't. Since it's a URI and doesn't use the

647

# scheme://netloc syntax, the netloc and related attributes

648

# should be left empty.

649

uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

650

p = urllib.parse.urlsplit(uri)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

651

self.assertEqual(p.netloc, "")

652

self.assertEqual(p.username, None)

653

self.assertEqual(p.password, None)

654

self.assertEqual(p.hostname, None)

655

self.assertEqual(p.port, None)

656

self.assertEqual(p.geturl(), uri)

657

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

658

p = urllib.parse.urlparse(uri)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

659

self.assertEqual(p.netloc, "")

660

self.assertEqual(p.username, None)

661

self.assertEqual(p.password, None)

662

self.assertEqual(p.hostname, None)

663

self.assertEqual(p.port, None)

664

self.assertEqual(p.geturl(), uri)

665

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

666

# You guessed it, repeating the test with bytes input

667

uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"

668

p = urllib.parse.urlsplit(uri)

669

self.assertEqual(p.netloc, b"")

670

self.assertEqual(p.username, None)

671

self.assertEqual(p.password, None)

672

self.assertEqual(p.hostname, None)

673

self.assertEqual(p.port, None)

674

self.assertEqual(p.geturl(), uri)

675

676

p = urllib.parse.urlparse(uri)

677

self.assertEqual(p.netloc, b"")

678

self.assertEqual(p.username, None)

679

self.assertEqual(p.password, None)

680

self.assertEqual(p.hostname, None)

681

self.assertEqual(p.port, None)

682

self.assertEqual(p.geturl(), uri)

683

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

684

def test_noslash(self):

685

# Issue 1637: http://foo.com?query is legal

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

686

self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

687

('http', 'example.com', '', '', 'blahblah=/foo', ''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

688

self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),

689

(b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

690

Senthil Kumaran

84c7d9f

2010-08-04 04:50:44 +0000

[diff] [blame]

691

def test_withoutscheme(self):

692

# Test urlparse without scheme

693

# Issue 754016: urlparse goes wrong with IP:port without scheme

694

# RFC 1808 specifies that netloc should start with //, urlparse expects

695

# the same, otherwise it classifies the portion of url as path.

696

self.assertEqual(urllib.parse.urlparse("path"),

697

('','','path','','',''))

698

self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),

699

('','www.python.org:80','','','',''))

700

self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),

701

('http','www.python.org:80','','','',''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

702

# Repeat for bytes input

703

self.assertEqual(urllib.parse.urlparse(b"path"),

704

(b'',b'',b'path',b'',b'',b''))

705

self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),

706

(b'',b'www.python.org:80',b'',b'',b'',b''))

707

self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),

708

(b'http',b'www.python.org:80',b'',b'',b'',b''))

Senthil Kumaran

84c7d9f

2010-08-04 04:50:44 +0000

[diff] [blame]

709

710

def test_portseparator(self):

711

# Issue 754016 makes changes for port separator ':' from scheme separator

712

self.assertEqual(urllib.parse.urlparse("path:80"),

713

('','','path:80','','',''))

714

self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))

715

self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))

716

self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),

717

('http','www.python.org:80','','','',''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

718

# As usual, need to check bytes input as well

719

self.assertEqual(urllib.parse.urlparse(b"path:80"),

720

(b'',b'',b'path:80',b'',b'',b''))

721

self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))

722

self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))

723

self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),

724

(b'http',b'www.python.org:80',b'',b'',b'',b''))

Senthil Kumaran

84c7d9f

2010-08-04 04:50:44 +0000

[diff] [blame]

725

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

726

def test_usingsys(self):

727

# Issue 3314: sys module is used in the error

728

self.assertRaises(TypeError, urllib.parse.urlencode, "foo")

729

Senthil Kumaran

6be85c5

2010-02-19 07:42:50 +0000

[diff] [blame]

730

def test_anyscheme(self):

731

# Issue 7904: s3://foo.com/stuff has netloc "foo.com".

Ezio Melotti

5e15efa

2010-02-19 14:49:02 +0000

[diff] [blame]

732

self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),

733

('s3', 'foo.com', '/stuff', '', '', ''))

734

self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),

735

('x-newscheme', 'foo.com', '/stuff', '', '', ''))

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

736

self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),

737

('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))

738

self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),

739

('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))

740

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

741

# And for bytes...

742

self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),

743

(b's3', b'foo.com', b'/stuff', b'', b'', b''))

744

self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),

745

(b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

746

self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),

747

(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))

748

self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),

749

(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

750

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

751

def test_default_scheme(self):

752

# Exercise the scheme parameter of urlparse() and urlsplit()

753

for func in (urllib.parse.urlparse, urllib.parse.urlsplit):

754

with self.subTest(function=func):

755

result = func("http://example.net/", "ftp")

756

self.assertEqual(result.scheme, "http")

757

result = func(b"http://example.net/", b"ftp")

758

self.assertEqual(result.scheme, b"http")

759

self.assertEqual(func("path", "ftp").scheme, "ftp")

760

self.assertEqual(func("path", scheme="ftp").scheme, "ftp")

761

self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")

762

self.assertEqual(func("path").scheme, "")

763

self.assertEqual(func(b"path").scheme, b"")

764

self.assertEqual(func(b"path", "").scheme, b"")

765

766

def test_parse_fragments(self):

767

# Exercise the allow_fragments parameter of urlparse() and urlsplit()

768

tests = (

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

769

("http:#frag", "path", "frag"),

770

("//example.net#frag", "path", "frag"),

771

("index.html#frag", "path", "frag"),

772

(";a=b#frag", "params", "frag"),

773

("?a=b#frag", "query", "frag"),

774

("#frag", "path", "frag"),

775

("abc#@frag", "path", "@frag"),

776

("//abc#@frag", "path", "@frag"),

777

("//abc:80#@frag", "path", "@frag"),

778

("//abc#@frag:80", "path", "@frag:80"),

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

779

)

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

780

for url, attr, expected_frag in tests:

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

781

for func in (urllib.parse.urlparse, urllib.parse.urlsplit):

782

if attr == "params" and func is urllib.parse.urlsplit:

783

attr = "path"

784

with self.subTest(url=url, function=func):

785

result = func(url, allow_fragments=False)

786

self.assertEqual(result.fragment, "")

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

787

self.assertTrue(

788

getattr(result, attr).endswith("#" + expected_frag))

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

789

self.assertEqual(func(url, "", False).fragment, "")

790

791

result = func(url, allow_fragments=True)

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

792

self.assertEqual(result.fragment, expected_frag)

793

self.assertFalse(

794

getattr(result, attr).endswith(expected_frag))

795

self.assertEqual(func(url, "", True).fragment,

796

expected_frag)

797

self.assertEqual(func(url).fragment, expected_frag)

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

798

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

799

def test_mixed_types_rejected(self):

800

# Several functions that process either strings or ASCII encoded bytes

801

# accept multiple arguments. Check they reject mixed type input

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

802

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

803

urllib.parse.urlparse("www.python.org", b"http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

804

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

805

urllib.parse.urlparse(b"www.python.org", "http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

806

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

807

urllib.parse.urlsplit("www.python.org", b"http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

808

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

809

urllib.parse.urlsplit(b"www.python.org", "http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

810

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

811

urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

812

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

813

urllib.parse.urlunparse(("http", b"www.python.org","","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

814

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

815

urllib.parse.urlunsplit((b"http", "www.python.org","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

816

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

817

urllib.parse.urlunsplit(("http", b"www.python.org","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

818

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

819

urllib.parse.urljoin("http://python.org", b"http://python.org")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

820

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

821

urllib.parse.urljoin(b"http://python.org", "http://python.org")

822

823

def _check_result_type(self, str_type):

824

num_args = len(str_type._fields)

825

bytes_type = str_type._encoded_counterpart

826

self.assertIs(bytes_type._decoded_counterpart, str_type)

827

str_args = ('',) * num_args

828

bytes_args = (b'',) * num_args

829

str_result = str_type(*str_args)

830

bytes_result = bytes_type(*bytes_args)

831

encoding = 'ascii'

832

errors = 'strict'

833

self.assertEqual(str_result, str_args)

834

self.assertEqual(bytes_result.decode(), str_args)

835

self.assertEqual(bytes_result.decode(), str_result)

836

self.assertEqual(bytes_result.decode(encoding), str_args)

837

self.assertEqual(bytes_result.decode(encoding), str_result)

838

self.assertEqual(bytes_result.decode(encoding, errors), str_args)

839

self.assertEqual(bytes_result.decode(encoding, errors), str_result)

840

self.assertEqual(bytes_result, bytes_args)

841

self.assertEqual(str_result.encode(), bytes_args)

842

self.assertEqual(str_result.encode(), bytes_result)

843

self.assertEqual(str_result.encode(encoding), bytes_args)

844

self.assertEqual(str_result.encode(encoding), bytes_result)

845

self.assertEqual(str_result.encode(encoding, errors), bytes_args)

846

self.assertEqual(str_result.encode(encoding, errors), bytes_result)

847

848

def test_result_pairs(self):

849

# Check encoding and decoding between result pairs

850

result_types = [

851

urllib.parse.DefragResult,

852

urllib.parse.SplitResult,

853

urllib.parse.ParseResult,

854

]

855

for result_type in result_types:

856

self._check_result_type(result_type)

857

Victor Stinner

1d87deb

2011-01-14 13:05:19 +0000

[diff] [blame]

858

def test_parse_qs_encoding(self):

859

result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")

860

self.assertEqual(result, {'key': ['\u0141\xE9']})

861

result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")

862

self.assertEqual(result, {'key': ['\u0141\xE9']})

863

result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")

864

self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})

865

result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")

866

self.assertEqual(result, {'key': ['\u0141\ufffd-']})

867

result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",

868

errors="ignore")

869

self.assertEqual(result, {'key': ['\u0141-']})

870

871

def test_parse_qsl_encoding(self):

872

result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")

873

self.assertEqual(result, [('key', '\u0141\xE9')])

874

result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")

875

self.assertEqual(result, [('key', '\u0141\xE9')])

876

result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")

877

self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])

878

result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")

879

self.assertEqual(result, [('key', '\u0141\ufffd-')])

880

result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",

881

errors="ignore")

882

self.assertEqual(result, [('key', '\u0141-')])

883

matthewbelisle-wf

2091448

2018-10-19 05:52:59 -0500

[diff] [blame]

884

def test_parse_qsl_max_num_fields(self):

885

with self.assertRaises(ValueError):

886

urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)

887

with self.assertRaises(ValueError):

888

urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)

889

urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)

890

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

891

def test_urlencode_sequences(self):

892

# Other tests incidentally urlencode things; test non-covered cases:

893

# Sequence and object values.

894

result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)

Georg Brandl

09a7c72

2012-02-20 21:31:46 +0100

[diff] [blame]

895

# we cannot rely on ordering here

896

assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

class Trivial:

def __str__(self):

return 'trivial'

result = urllib.parse.urlencode({'a': Trivial()}, True)

903

self.assertEqual(result, 'a=trivial')

904

R David Murray

c17686f

2015-05-17 20:44:50 -0400

[diff] [blame]

905

def test_urlencode_quote_via(self):

906

result = urllib.parse.urlencode({'a': 'some value'})

907

self.assertEqual(result, "a=some+value")

908

result = urllib.parse.urlencode({'a': 'some value/another'},

909

quote_via=urllib.parse.quote)

910

self.assertEqual(result, "a=some%20value%2Fanother")

911

result = urllib.parse.urlencode({'a': 'some value/another'},

912

safe='/', quote_via=urllib.parse.quote)

913

self.assertEqual(result, "a=some%20value/another")

914

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

915

def test_quote_from_bytes(self):

916

self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')

917

result = urllib.parse.quote_from_bytes(b'archaeological arcana')

918

self.assertEqual(result, 'archaeological%20arcana')

919

result = urllib.parse.quote_from_bytes(b'')

920

self.assertEqual(result, '')

921

922

def test_unquote_to_bytes(self):

923

result = urllib.parse.unquote_to_bytes('abc%20def')

924

self.assertEqual(result, b'abc def')

925

result = urllib.parse.unquote_to_bytes('')

926

self.assertEqual(result, b'')

927

928

def test_quote_errors(self):

929

self.assertRaises(TypeError, urllib.parse.quote, b'foo',

930

encoding='utf-8')

931

self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')

Victor Stinner

1d87deb

2011-01-14 13:05:19 +0000

[diff] [blame]

932

Ezio Melotti

6709b7d

2012-05-19 17:15:19 +0300

[diff] [blame]

933

def test_issue14072(self):

934

p1 = urllib.parse.urlsplit('tel:+31-641044153')

935

self.assertEqual(p1.scheme, 'tel')

936

self.assertEqual(p1.path, '+31-641044153')

937

p2 = urllib.parse.urlsplit('tel:+31641044153')

938

self.assertEqual(p2.scheme, 'tel')

939

self.assertEqual(p2.path, '+31641044153')

Senthil Kumaran

ed30199

2012-12-24 14:00:20 -0800

[diff] [blame]

940

# assert the behavior for urlparse

941

p1 = urllib.parse.urlparse('tel:+31-641044153')

942

self.assertEqual(p1.scheme, 'tel')

943

self.assertEqual(p1.path, '+31-641044153')

944

p2 = urllib.parse.urlparse('tel:+31641044153')

945

self.assertEqual(p2.scheme, 'tel')

946

self.assertEqual(p2.path, '+31641044153')

947

Matt Eaton

2cb4661

2018-03-20 01:41:37 -0500

[diff] [blame]

948

def test_port_casting_failure_message(self):

949

message = "Port could not be cast to integer value as 'oracle'"

950

p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')

951

with self.assertRaisesRegex(ValueError, message):

952

p1.port

953

954

p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')

955

with self.assertRaisesRegex(ValueError, message):

956

p2.port

957

Senthil Kumaran

ed30199

2012-12-24 14:00:20 -0800

[diff] [blame]

958

def test_telurl_params(self):

959

p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')

960

self.assertEqual(p1.scheme, 'tel')

961

self.assertEqual(p1.path, '123-4')

962

self.assertEqual(p1.params, 'phone-context=+1-650-516')

963

964

p1 = urllib.parse.urlparse('tel:+1-201-555-0123')

965

self.assertEqual(p1.scheme, 'tel')

966

self.assertEqual(p1.path, '+1-201-555-0123')

967

self.assertEqual(p1.params, '')

968

969

p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')

970

self.assertEqual(p1.scheme, 'tel')

971

self.assertEqual(p1.path, '7042')

972

self.assertEqual(p1.params, 'phone-context=example.com')

973

974

p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')

975

self.assertEqual(p1.scheme, 'tel')

976

self.assertEqual(p1.path, '863-1234')

977

self.assertEqual(p1.params, 'phone-context=+1-914-555')

978

R David Murray

f516388

2013-03-21 20:56:51 -0400

[diff] [blame]

979

def test_Quoter_repr(self):

980

quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)

981

self.assertIn('Quoter', repr(quoter))

982

Serhiy Storchaka

1515450

2015-04-07 19:09:01 +0300

[diff] [blame]

def test_all(self):

expected = []

undocumented = {

'splitattr', 'splithost', 'splitnport', 'splitpasswd',

987

'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',

988

'splitvalue',

989

'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',

990

}

991

for name in dir(urllib.parse):

992

if name.startswith('_') or name in undocumented:

993

continue

994

object = getattr(urllib.parse, name)

995

if getattr(object, '__module__', None) == 'urllib.parse':

996

expected.append(name)

997

self.assertCountEqual(urllib.parse.__all__, expected)

998

Steve Dower

16e6f7d

2019-03-07 08:02:26 -0800

[diff] [blame]

999

def test_urlsplit_normalization(self):

1000

# Certain characters should never occur in the netloc,

1001

# including under normalization.

1002

# Ensure that ALL of them are detected and cause an error

1003

illegal_chars = '/:#?@'

1004

hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}

1005

denorm_chars = [

1006

c for c in map(chr, range(128, sys.maxunicode))

1007

if (hex_chars & set(unicodedata.decomposition(c).split()))

1008

and c not in illegal_chars

1009

]

1010

# Sanity check that we found at least one such character

1011

self.assertIn('\u2100', denorm_chars)

1012

self.assertIn('\uFF03', denorm_chars)

1013

Steve Dower

d537ab0

2019-04-30 12:03:02 +0000

[diff] [blame]

1014

# bpo-36742: Verify port separators are ignored when they

1015

# existed prior to decomposition

1016

urllib.parse.urlsplit('http://\u30d5\u309a:80')

1017

with self.assertRaises(ValueError):

1018

urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')

1019

Steve Dower

16e6f7d

2019-03-07 08:02:26 -0800

[diff] [blame]

1020

for scheme in ["http", "https", "ftp"]:

Steve Dower

8d0ef0b

2019-06-04 08:55:30 -0700

[diff] [blame]

1021

for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:

1022

for c in denorm_chars:

1023

url = "{}://{}/path".format(scheme, netloc.format(c))

1024

with self.subTest(url=url, char='{:04X}'.format(ord(c))):

1025

with self.assertRaises(ValueError):

1026

urllib.parse.urlsplit(url)

Senthil Kumaran

6be85c5

2010-02-19 07:42:50 +0000

[diff] [blame]

1027

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1028

class Utility_Tests(unittest.TestCase):

1029

"""Testcase to test the various utility functions in the urllib."""

1030

# In Python 2 this test class was in test_urllib.

1031

1032

def test_splittype(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1033

splittype = urllib.parse._splittype

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1034

self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))

1035

self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))

1036

self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))

1037

self.assertEqual(splittype('type:'), ('type', ''))

1038

self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))

1039

1040

def test_splithost(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1041

splithost = urllib.parse._splithost

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1042

self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),

1043

('www.example.org:80', '/foo/bar/baz.html'))

1044

self.assertEqual(splithost('//www.example.org:80'),

1045

('www.example.org:80', ''))

1046

self.assertEqual(splithost('/foo/bar/baz.html'),

1047

(None, '/foo/bar/baz.html'))

1048

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

1049

# bpo-30500: # starts a fragment.

1050

self.assertEqual(splithost('//127.0.0.1#@host.com'),

1051

('127.0.0.1', '/#@host.com'))

1052

self.assertEqual(splithost('//127.0.0.1#@host.com:80'),

1053

('127.0.0.1', '/#@host.com:80'))

1054

self.assertEqual(splithost('//127.0.0.1:80#@host.com'),

1055

('127.0.0.1:80', '/#@host.com'))

1056

1057

# Empty host is returned as empty string.

1058

self.assertEqual(splithost("///file"),

1059

('', '/file'))

1060

1061

# Trailing semicolon, question mark and hash symbol are kept.

1062

self.assertEqual(splithost("//example.net/file;"),

1063

('example.net', '/file;'))

1064

self.assertEqual(splithost("//example.net/file?"),

1065

('example.net', '/file?'))

1066

self.assertEqual(splithost("//example.net/file#"),

1067

('example.net', '/file#'))

1068

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1069

def test_splituser(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1070

splituser = urllib.parse._splituser

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1071

self.assertEqual(splituser('User:Pass@www.python.org:080'),

1072

('User:Pass', 'www.python.org:080'))

1073

self.assertEqual(splituser('@www.python.org:080'),

1074

('', 'www.python.org:080'))

1075

self.assertEqual(splituser('www.python.org:080'),

1076

(None, 'www.python.org:080'))

1077

self.assertEqual(splituser('User:Pass@'),

1078

('User:Pass', ''))

1079

self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),

1080

('User@example.com:Pass', 'www.python.org:080'))

1081

1082

def test_splitpasswd(self):

1083

# Some of the password examples are not sensible, but it is added to

1084

# confirming to RFC2617 and addressing issue4675.

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1085

splitpasswd = urllib.parse._splitpasswd

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1086

self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))

1087

self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))

1088

self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))

1089

self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))

1090

self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))

1091

self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))

1092

self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))

1093

self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))

1094

self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))

1095

self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))

1096

self.assertEqual(splitpasswd('user:'), ('user', ''))

1097

self.assertEqual(splitpasswd('user'), ('user', None))

1098

self.assertEqual(splitpasswd(':ab'), ('', 'ab'))

1099

1100

def test_splitport(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1101

splitport = urllib.parse._splitport

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1102

self.assertEqual(splitport('parrot:88'), ('parrot', '88'))

1103

self.assertEqual(splitport('parrot'), ('parrot', None))

1104

self.assertEqual(splitport('parrot:'), ('parrot', None))

1105

self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))

1106

self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))

1107

self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))

1108

self.assertEqual(splitport('[::1]'), ('[::1]', None))

1109

self.assertEqual(splitport(':88'), ('', '88'))

1110

1111

def test_splitnport(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1112

splitnport = urllib.parse._splitnport

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1113

self.assertEqual(splitnport('parrot:88'), ('parrot', 88))

1114

self.assertEqual(splitnport('parrot'), ('parrot', -1))

1115

self.assertEqual(splitnport('parrot', 55), ('parrot', 55))

1116

self.assertEqual(splitnport('parrot:'), ('parrot', -1))

1117

self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))

1118

self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))

1119

self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))

1120

self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))

1121

self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))

1122

1123

def test_splitquery(self):

1124

# Normal cases are exercised by other tests; ensure that we also

1125

# catch cases with no port specified (testcase ensuring coverage)

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1126

splitquery = urllib.parse._splitquery

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1127

self.assertEqual(splitquery('http://python.org/fake?foo=bar'),

1128

('http://python.org/fake', 'foo=bar'))

1129

self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),

1130

('http://python.org/fake?foo=bar', ''))

1131

self.assertEqual(splitquery('http://python.org/fake'),

1132

('http://python.org/fake', None))

1133

self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))

1134

1135

def test_splittag(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1136

splittag = urllib.parse._splittag

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1137

self.assertEqual(splittag('http://example.com?foo=bar#baz'),

1138

('http://example.com?foo=bar', 'baz'))

1139

self.assertEqual(splittag('http://example.com?foo=bar#'),

1140

('http://example.com?foo=bar', ''))

1141

self.assertEqual(splittag('#baz'), ('', 'baz'))

1142

self.assertEqual(splittag('http://example.com?foo=bar'),

1143

('http://example.com?foo=bar', None))

1144

self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),

1145

('http://example.com?foo=bar#baz', 'boo'))

1146

1147

def test_splitattr(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1148

splitattr = urllib.parse._splitattr

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1149

self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),

1150

('/path', ['attr1=value1', 'attr2=value2']))

1151

self.assertEqual(splitattr('/path;'), ('/path', ['']))

1152

self.assertEqual(splitattr(';attr1=value1;attr2=value2'),

1153

('', ['attr1=value1', 'attr2=value2']))

1154

self.assertEqual(splitattr('/path'), ('/path', []))

1155

1156

def test_splitvalue(self):

1157

# Normal cases are exercised by other tests; test pathological cases

1158

# with no key/value pairs. (testcase ensuring coverage)

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1159

splitvalue = urllib.parse._splitvalue

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1160

self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))

1161

self.assertEqual(splitvalue('foo='), ('foo', ''))

1162

self.assertEqual(splitvalue('=bar'), ('', 'bar'))

1163

self.assertEqual(splitvalue('foobar'), ('foobar', None))

1164

self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))

1165

1166

def test_to_bytes(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1167

result = urllib.parse._to_bytes('http://www.python.org')

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1168

self.assertEqual(result, 'http://www.python.org')

Cheryl Sabella

0250de4

2018-04-25 16:51:54 -0700

[diff] [blame]

1169

self.assertRaises(UnicodeError, urllib.parse._to_bytes,

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1170

'http://www.python.org/medi\u00e6val')

1171

1172

def test_unwrap(self):

Rémi Lapeyre

674ee12

2019-05-27 15:43:45 +0200

[diff] [blame]

1173

for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',

1174

'URL:scheme://host/path', 'scheme://host/path'):

1175

url = urllib.parse.unwrap(wrapped_url)

1176

self.assertEqual(url, 'scheme://host/path')

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1177

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

1178

Cheryl Sabella

0250de4

2018-04-25 16:51:54 -0700

[diff] [blame]

1179

class DeprecationTest(unittest.TestCase):

1180

1181

def test_splittype_deprecation(self):

1182

with self.assertWarns(DeprecationWarning) as cm:

1183

urllib.parse.splittype('')

1184

self.assertEqual(str(cm.warning),

1185

'urllib.parse.splittype() is deprecated as of 3.8, '

1186

'use urllib.parse.urlparse() instead')

1187

1188

def test_splithost_deprecation(self):

1189

with self.assertWarns(DeprecationWarning) as cm:

1190

urllib.parse.splithost('')

1191

self.assertEqual(str(cm.warning),

1192

'urllib.parse.splithost() is deprecated as of 3.8, '

1193

'use urllib.parse.urlparse() instead')

1194

1195

def test_splituser_deprecation(self):

1196

with self.assertWarns(DeprecationWarning) as cm:

1197

urllib.parse.splituser('')

1198

self.assertEqual(str(cm.warning),

1199

'urllib.parse.splituser() is deprecated as of 3.8, '

1200

'use urllib.parse.urlparse() instead')

1201

1202

def test_splitpasswd_deprecation(self):

1203

with self.assertWarns(DeprecationWarning) as cm:

1204

urllib.parse.splitpasswd('')

1205

self.assertEqual(str(cm.warning),

1206

'urllib.parse.splitpasswd() is deprecated as of 3.8, '

1207

'use urllib.parse.urlparse() instead')

1208

1209

def test_splitport_deprecation(self):

1210

with self.assertWarns(DeprecationWarning) as cm:

1211

urllib.parse.splitport('')

1212

self.assertEqual(str(cm.warning),

1213

'urllib.parse.splitport() is deprecated as of 3.8, '

1214

'use urllib.parse.urlparse() instead')

1215

1216

def test_splitnport_deprecation(self):

1217

with self.assertWarns(DeprecationWarning) as cm:

1218

urllib.parse.splitnport('')

1219

self.assertEqual(str(cm.warning),

1220

'urllib.parse.splitnport() is deprecated as of 3.8, '

1221

'use urllib.parse.urlparse() instead')

1222

1223

def test_splitquery_deprecation(self):

1224

with self.assertWarns(DeprecationWarning) as cm:

1225

urllib.parse.splitquery('')

1226

self.assertEqual(str(cm.warning),

1227

'urllib.parse.splitquery() is deprecated as of 3.8, '

1228

'use urllib.parse.urlparse() instead')

1229

1230

def test_splittag_deprecation(self):

1231

with self.assertWarns(DeprecationWarning) as cm:

1232

urllib.parse.splittag('')

1233

self.assertEqual(str(cm.warning),

1234

'urllib.parse.splittag() is deprecated as of 3.8, '

1235

'use urllib.parse.urlparse() instead')

1236

1237

def test_splitattr_deprecation(self):

1238

with self.assertWarns(DeprecationWarning) as cm:

1239

urllib.parse.splitattr('')

1240

self.assertEqual(str(cm.warning),

1241

'urllib.parse.splitattr() is deprecated as of 3.8, '

1242

'use urllib.parse.urlparse() instead')

1243

1244

def test_splitvalue_deprecation(self):

1245

with self.assertWarns(DeprecationWarning) as cm:

1246

urllib.parse.splitvalue('')

1247

self.assertEqual(str(cm.warning),

1248

'urllib.parse.splitvalue() is deprecated as of 3.8, '

1249

'use urllib.parse.parse_qsl() instead')

1250

1251

def test_to_bytes_deprecation(self):

1252

with self.assertWarns(DeprecationWarning) as cm:

1253

urllib.parse.to_bytes('')

1254

self.assertEqual(str(cm.warning),

1255

'urllib.parse.to_bytes() is deprecated as of 3.8')

1256

Cheryl Sabella

0250de4

2018-04-25 16:51:54 -0700

[diff] [blame]

1257

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

1258

if __name__ == "__main__":

Serhiy Storchaka