Blame - Lib/test/test_urlparse.py - platform/external/python/cpython3

2002-03-23 05:32:10 +0000

[diff] [blame]

3

import unittest

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

4

import urllib.parse

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

5

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

6

RFC1808_BASE = "http://a/b/c/d;p?q#f"

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

7

RFC2396_BASE = "http://a/b/c/d;p?q"

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

8

RFC3986_BASE = 'http://a/b/c/d;p?q'

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

9

SIMPLE_BASE = 'http://a/b/c/d'

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

10

Senthil Kumaran

257b980

2017-04-04 21:19:43 -0700

[diff] [blame]

11

# Each parse_qsl testcase is a two-tuple that contains

12

# a string with the query and a list with the expected result.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

13

14

parse_qsl_test_cases = [

("", []),

("&", []),

("&&", []),

("=", [('', '')]),

("=a", [('', 'a')]),

("a", [('a', '')]),

("a=", [('a', '')]),

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

22

("&a=b", [('a', 'b')]),

23

("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),

24

("a=1&a=2", [('a', '1'), ('a', '2')]),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

(b"", []),

(b"&", []),

(b"&&", []),

(b"=", [(b'', b'')]),

29

(b"=a", [(b'', b'a')]),

30

(b"a", [(b'a', b'')]),

31

(b"a=", [(b'a', b'')]),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

32

(b"&a=b", [(b'a', b'b')]),

33

(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),

34

(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),

Adam Goldschmidt

fcbe0cb

2021-02-15 00:41:57 +0200

[diff] [blame]

35

(";a=b", [(';a', 'b')]),

36

("a=a+b;b=b+c", [('a', 'a b;b=b c')]),

37

(b";a=b", [(b';a', b'b')]),

38

(b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),

Senthil Kumaran

e38415e

2016-04-16 07:33:15 -0700

[diff] [blame]

39

]

40

Senthil Kumaran

257b980

2017-04-04 21:19:43 -0700

[diff] [blame]

41

# Each parse_qs testcase is a two-tuple that contains

42

# a string with the query and a dictionary with the expected result.

43

Senthil Kumaran

e38415e

2016-04-16 07:33:15 -0700

[diff] [blame]

44

parse_qs_test_cases = [

("", {}),

("&", {}),

("&&", {}),

("=", {'': ['']}),

("=a", {'': ['a']}),

("a", {'a': ['']}),

("a=", {'a': ['']}),

("&a=b", {'a': ['b']}),

53

("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),

54

("a=1&a=2", {'a': ['1', '2']}),

(b"", {}),

(b"&", {}),

(b"&&", {}),

(b"=", {b'': [b'']}),

59

(b"=a", {b'': [b'a']}),

60

(b"a", {b'a': [b'']}),

61

(b"a=", {b'a': [b'']}),

62

(b"&a=b", {b'a': [b'b']}),

63

(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),

64

(b"a=1&a=2", {b'a': [b'1', b'2']}),

Adam Goldschmidt

fcbe0cb

2021-02-15 00:41:57 +0200

[diff] [blame]

65

(";a=b", {';a': ['b']}),

66

("a=a+b;b=b+c", {'a': ['a b;b=b c']}),

67

(b";a=b", {b';a': [b'b']}),

68

(b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

69

]

70

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

71

class UrlParseTestCase(unittest.TestCase):

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

72

73

def checkRoundtrips(self, url, parsed, split):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

74

result = urllib.parse.urlparse(url)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

75

self.assertEqual(result, parsed)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

76

t = (result.scheme, result.netloc, result.path,

77

result.params, result.query, result.fragment)

78

self.assertEqual(t, parsed)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

79

# put it back together and it should be the same

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

80

result2 = urllib.parse.urlunparse(result)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

81

self.assertEqual(result2, url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

82

self.assertEqual(result2, result.geturl())

83

84

# the result of geturl() is a fixpoint; we can always parse it

85

# again to get the same result:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

86

result3 = urllib.parse.urlparse(result.geturl())

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

87

self.assertEqual(result3.geturl(), result.geturl())

88

self.assertEqual(result3, result)

89

self.assertEqual(result3.scheme, result.scheme)

90

self.assertEqual(result3.netloc, result.netloc)

91

self.assertEqual(result3.path, result.path)

92

self.assertEqual(result3.params, result.params)

93

self.assertEqual(result3.query, result.query)

94

self.assertEqual(result3.fragment, result.fragment)

95

self.assertEqual(result3.username, result.username)

96

self.assertEqual(result3.password, result.password)

97

self.assertEqual(result3.hostname, result.hostname)

98

self.assertEqual(result3.port, result.port)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

99

100

# check the roundtrip using urlsplit() as well

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

101

result = urllib.parse.urlsplit(url)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

102

self.assertEqual(result, split)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

103

t = (result.scheme, result.netloc, result.path,

104

result.query, result.fragment)

105

self.assertEqual(t, split)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

106

result2 = urllib.parse.urlunsplit(result)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

107

self.assertEqual(result2, url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

108

self.assertEqual(result2, result.geturl())

109

110

# check the fixpoint property of re-parsing the result of geturl()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

111

result3 = urllib.parse.urlsplit(result.geturl())

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

112

self.assertEqual(result3.geturl(), result.geturl())

113

self.assertEqual(result3, result)

114

self.assertEqual(result3.scheme, result.scheme)

115

self.assertEqual(result3.netloc, result.netloc)

116

self.assertEqual(result3.path, result.path)

117

self.assertEqual(result3.query, result.query)

118

self.assertEqual(result3.fragment, result.fragment)

119

self.assertEqual(result3.username, result.username)

120

self.assertEqual(result3.password, result.password)

121

self.assertEqual(result3.hostname, result.hostname)

122

self.assertEqual(result3.port, result.port)

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

123

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

124

def test_qsl(self):

125

for orig, expect in parse_qsl_test_cases:

126

result = urllib.parse.parse_qsl(orig, keep_blank_values=True)

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

127

self.assertEqual(result, expect, "Error parsing %r" % orig)

128

expect_without_blanks = [v for v in expect if len(v[1])]

129

result = urllib.parse.parse_qsl(orig, keep_blank_values=False)

130

self.assertEqual(result, expect_without_blanks,

131

"Error parsing %r" % orig)

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

132

Senthil Kumaran

e38415e

2016-04-16 07:33:15 -0700

[diff] [blame]

133

def test_qs(self):

134

for orig, expect in parse_qs_test_cases:

135

result = urllib.parse.parse_qs(orig, keep_blank_values=True)

136

self.assertEqual(result, expect, "Error parsing %r" % orig)

137

expect_without_blanks = {v: expect[v]

138

for v in expect if len(expect[v][0])}

139

result = urllib.parse.parse_qs(orig, keep_blank_values=False)

140

self.assertEqual(result, expect_without_blanks,

141

"Error parsing %r" % orig)

142

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

143

def test_roundtrips(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

144

str_cases = [

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

145

('file:///tmp/junk.txt',

146

('file', '', '/tmp/junk.txt', '', '', ''),

147

('file', '', '/tmp/junk.txt', '', '')),

Neal Norwitz

68b539e

2003-01-06 06:58:31 +0000

[diff] [blame]

148

('imap://mail.python.org/mbox1',

149

('imap', 'mail.python.org', '/mbox1', '', '', ''),

150

('imap', 'mail.python.org', '/mbox1', '', '')),

Skip Montanaro

f09b88e

2003-01-06 20:27:03 +0000

[diff] [blame]

151

('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

152

('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',

153

'', '', ''),

154

('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',

155

'', '')),

Senthil Kumaran

eaaec27

2009-03-30 21:54:41 +0000

[diff] [blame]

156

('nfs://server/path/to/file.txt',

157

('nfs', 'server', '/path/to/file.txt', '', '', ''),

158

('nfs', 'server', '/path/to/file.txt', '', '')),

Fred Drake

50747fc

2005-07-29 15:56:32 +0000

[diff] [blame]

159

('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',

160

('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',

161

'', '', ''),

162

('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',

Senthil Kumaran

ead169d

2010-05-13 03:37:23 +0000

[diff] [blame]

163

'', '')),

164

('git+ssh://git@github.com/user/project.git',

165

('git+ssh', 'git@github.com','/user/project.git',

166

'','',''),

167

('git+ssh', 'git@github.com','/user/project.git',

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

168

'', '')),

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

169

]

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

170

def _encode(t):

171

return (t[0].encode('ascii'),

172

tuple(x.encode('ascii') for x in t[1]),

173

tuple(x.encode('ascii') for x in t[2]))

174

bytes_cases = [_encode(x) for x in str_cases]

175

for url, parsed, split in str_cases + bytes_cases:

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

176

self.checkRoundtrips(url, parsed, split)

Michael W. Hudson

bd3e771

2002-03-18 13:06:00 +0000

[diff] [blame]

177

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

178

def test_http_roundtrips(self):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

179

# urllib.parse.urlsplit treats 'http:' as an optimized special case,

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

180

# so we test both 'http:' and 'https:' in all the following.

181

# Three cheers for white box knowledge!

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

182

str_cases = [

Johannes Gijsbers

2005-01-09 15:29:10 +0000

[diff] [blame]

183

('://www.python.org',

184

('www.python.org', '', '', '', ''),

185

('www.python.org', '', '', '')),

186

('://www.python.org#abc',

187

('www.python.org', '', '', '', 'abc'),

188

('www.python.org', '', '', 'abc')),

189

('://www.python.org?q=abc',

190

('www.python.org', '', '', 'q=abc', ''),

191

('www.python.org', '', 'q=abc', '')),

192

('://www.python.org/#abc',

193

('www.python.org', '/', '', '', 'abc'),

194

('www.python.org', '/', '', 'abc')),

195

('://a/b/c/d;p?q#f',

196

('a', '/b/c/d', 'p', 'q', 'f'),

197

('a', '/b/c/d;p', 'q', 'f')),

198

]

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

199

def _encode(t):

200

return (t[0].encode('ascii'),

201

tuple(x.encode('ascii') for x in t[1]),

202

tuple(x.encode('ascii') for x in t[2]))

203

bytes_cases = [_encode(x) for x in str_cases]

204

str_schemes = ('http', 'https')

205

bytes_schemes = (b'http', b'https')

206

str_tests = str_schemes, str_cases

207

bytes_tests = bytes_schemes, bytes_cases

208

for schemes, test_cases in (str_tests, bytes_tests):

209

for scheme in schemes:

210

for url, parsed, split in test_cases:

211

url = scheme + url

212

parsed = (scheme,) + parsed

213

split = (scheme,) + split

214

self.checkRoundtrips(url, parsed, split)

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

215

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

216

def checkJoin(self, base, relurl, expected):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

217

str_components = (base, relurl, expected)

218

self.assertEqual(urllib.parse.urljoin(base, relurl), expected)

219

bytes_components = baseb, relurlb, expectedb = [

220

x.encode('ascii') for x in str_components]

221

self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)

Guido van Rossum

bbc0568

2002-10-14 19:59:54 +0000

[diff] [blame]

222

223

def test_unparse_parse(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

224

str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]

225

bytes_cases = [x.encode('ascii') for x in str_cases]

226

for u in str_cases + bytes_cases:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

227

self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)

228

self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

229

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

230

def test_RFC1808(self):

231

# "normal" cases from RFC 1808:

232

self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')

233

self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')

234

self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')

235

self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')

236

self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')

237

self.checkJoin(RFC1808_BASE, '//g', 'http://g')

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

238

self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')

239

self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')

240

self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')

241

self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')

242

self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')

243

self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

244

self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')

245

self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')

246

self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')

247

self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')

248

self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')

249

self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')

250

self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')

251

self.checkJoin(RFC1808_BASE, '../..', 'http://a/')

252

self.checkJoin(RFC1808_BASE, '../../', 'http://a/')

253

self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

254

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

255

# "abnormal" cases from RFC 1808:

256

self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

257

self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')

258

self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')

259

self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')

260

self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')

261

self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')

262

self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')

263

self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')

264

self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

265

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

266

# RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),

267

# so we'll not actually run these tests (which expect 1808 behavior).

268

#self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')

269

#self.checkJoin(RFC1808_BASE, 'http:', 'http:')

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

270

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

271

# XXX: The following tests are no longer compatible with RFC3986

272

# self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')

273

# self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')

274

# self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')

275

# self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')

276

277

Senthil Kumaran

397eb44

2011-04-15 18:20:24 +0800

[diff] [blame]

278

def test_RFC2368(self):

279

# Issue 11467: path that starts with a number is not parsed correctly

280

self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),

281

('mailto', '', '1337@example.org', '', '', ''))

282

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

283

def test_RFC2396(self):

284

# cases from RFC 2396

Fred Drake

2001-01-05 05:57:04 +0000

[diff] [blame]

285

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

286

self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')

287

self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')

288

self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')

289

self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')

290

self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')

291

self.checkJoin(RFC2396_BASE, '//g', 'http://g')

292

self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')

293

self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')

294

self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')

295

self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')

296

self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')

297

self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')

298

self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')

299

self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')

300

self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')

301

self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')

302

self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')

303

self.checkJoin(RFC2396_BASE, '../..', 'http://a/')

304

self.checkJoin(RFC2396_BASE, '../../', 'http://a/')

305

self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')

306

self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

307

self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')

308

self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')

309

self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')

310

self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')

311

self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')

312

self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')

313

self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')

314

self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')

315

self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')

316

self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')

317

self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')

318

self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')

319

self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')

320

self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')

321

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

322

# XXX: The following tests are no longer compatible with RFC3986

323

# self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')

324

# self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')

325

# self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')

326

# self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')

327

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

328

def test_RFC3986(self):

329

self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

330

self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

331

self.checkJoin(RFC3986_BASE, 'g:h','g:h')

332

self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')

333

self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')

334

self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')

335

self.checkJoin(RFC3986_BASE, '/g','http://a/g')

336

self.checkJoin(RFC3986_BASE, '//g','http://g')

337

self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')

338

self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')

339

self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')

340

self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')

341

self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')

342

self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')

343

self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')

344

self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')

345

self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')

346

self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')

347

self.checkJoin(RFC3986_BASE, './','http://a/b/c/')

348

self.checkJoin(RFC3986_BASE, '..','http://a/b/')

349

self.checkJoin(RFC3986_BASE, '../','http://a/b/')

350

self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')

351

self.checkJoin(RFC3986_BASE, '../..','http://a/')

352

self.checkJoin(RFC3986_BASE, '../../','http://a/')

353

self.checkJoin(RFC3986_BASE, '../../g','http://a/g')

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

354

self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

355

Senthil Kumaran

257b980

2017-04-04 21:19:43 -0700

[diff] [blame]

356

# Abnormal Examples

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

357

358

# The 'abnormal scenarios' are incompatible with RFC2986 parsing

359

# Tests are here for reference.

360

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

361

self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')

362

self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')

363

self.checkJoin(RFC3986_BASE, '/./g','http://a/g')

364

self.checkJoin(RFC3986_BASE, '/../g','http://a/g')

Senthil Kumaran

2010-05-07 04:19:23 +0000

[diff] [blame]

365

self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')

366

self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')

367

self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')

368

self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')

369

self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')

370

self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')

371

self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')

372

self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')

373

self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')

374

self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')

375

self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')

376

self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')

377

self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')

378

self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')

379

#self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser

380

self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

381

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame]

382

# Test for issue9721

383

self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')

384

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

385

def test_urljoins(self):

386

self.checkJoin(SIMPLE_BASE, 'g:h','g:h')

387

self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')

388

self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')

389

self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')

390

self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')

391

self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')

392

self.checkJoin(SIMPLE_BASE, '/g','http://a/g')

393

self.checkJoin(SIMPLE_BASE, '//g','http://g')

394

self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')

395

self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')

396

self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')

397

self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')

398

self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')

399

self.checkJoin(SIMPLE_BASE, '..','http://a/b/')

400

self.checkJoin(SIMPLE_BASE, '../','http://a/b/')

401

self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')

402

self.checkJoin(SIMPLE_BASE, '../..','http://a/')

403

self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

404

self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')

405

self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

406

self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')

407

self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')

408

self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')

409

self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')

410

self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')

411

self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')

412

self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

413

self.checkJoin('http:///', '..','http:///')

414

self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')

415

self.checkJoin('', 'http://a/./g', 'http://a/./g')

Senthil Kumaran

2a157d2

2011-08-03 18:37:22 +0800

[diff] [blame]

416

self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')

Senthil Kumaran

7ce71f6

2011-08-03 22:08:46 +0800

[diff] [blame]

417

self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')

Berker Peksag

f676748

2016-09-16 14:43:58 +0300

[diff] [blame]

418

self.checkJoin('ws://a/b','g','ws://a/g')

419

self.checkJoin('wss://a/b','g','wss://a/g')

Senthil Kumaran

2010-07-14 10:21:22 +0000

[diff] [blame]

420

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

421

# XXX: The following tests are no longer compatible with RFC3986

422

# self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')

423

# self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')

424

Senthil Kumaran

a66e388

2014-09-22 15:49:16 +0800

[diff] [blame]

425

# test for issue22118 duplicate slashes

426

self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')

427

428

# Non-RFC-defined tests, covering variations of base and trailing

429

# slashes

430

self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')

431

self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')

432

self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')

433

self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')

434

self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')

435

self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')

436

Berker Peksag

20416f7

2015-04-16 02:31:14 +0300

[diff] [blame]

437

# issue 23703: don't duplicate filename

438

self.checkJoin('a', 'b', 'b')

439

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

440

def test_RFC2732(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

441

str_cases = [

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

442

('http://Test.python.org:5432/foo/', 'test.python.org', 5432),

443

('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),

444

('http://[::1]:5432/foo/', '::1', 5432),

445

('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),

446

('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),

447

('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',

448

'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),

449

('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),

450

('http://[::ffff:12.34.56.78]:5432/foo/',

451

'::ffff:12.34.56.78', 5432),

452

('http://Test.python.org/foo/', 'test.python.org', None),

453

('http://12.34.56.78/foo/', '12.34.56.78', None),

454

('http://[::1]/foo/', '::1', None),

455

('http://[dead:beef::1]/foo/', 'dead:beef::1', None),

456

('http://[dead:beef::]/foo/', 'dead:beef::', None),

457

('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',

458

'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),

459

('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),

460

('http://[::ffff:12.34.56.78]/foo/',

461

'::ffff:12.34.56.78', None),

Serhiy Storchaka

ff97b08

2014-01-18 18:30:33 +0200

[diff] [blame]

462

('http://Test.python.org:/foo/', 'test.python.org', None),

463

('http://12.34.56.78:/foo/', '12.34.56.78', None),

464

('http://[::1]:/foo/', '::1', None),

465

('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),

466

('http://[dead:beef::]:/foo/', 'dead:beef::', None),

467

('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',

468

'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),

469

('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),

470

('http://[::ffff:12.34.56.78]:/foo/',

471

'::ffff:12.34.56.78', None),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

472

]

473

def _encode(t):

474

return t[0].encode('ascii'), t[1].encode('ascii'), t[2]

475

bytes_cases = [_encode(x) for x in str_cases]

476

for url, hostname, port in str_cases + bytes_cases:

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

477

urlparsed = urllib.parse.urlparse(url)

478

self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))

479

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

480

str_cases = [

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

481

'http://::12.34.56.78]/',

482

'http://[::1/foo/',

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

483

'ftp://[::1/foo/bad]/bad',

Senthil Kumaran

2eaef05

2010-04-20 20:42:50 +0000

[diff] [blame]

484

'http://[::1/foo/bad]/bad',

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

485

'http://[::ffff:12.34.56.78']

486

bytes_cases = [x.encode('ascii') for x in str_cases]

487

for invalid_url in str_cases + bytes_cases:

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

488

self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)

Senthil Kumaran

2010-04-16 03:02:13 +0000

[diff] [blame]

489

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

490

def test_urldefrag(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

491

str_cases = [

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

492

('http://python.org#frag', 'http://python.org', 'frag'),

493

('http://python.org', 'http://python.org', ''),

494

('http://python.org/#frag', 'http://python.org/', 'frag'),

495

('http://python.org/', 'http://python.org/', ''),

496

('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),

497

('http://python.org/?q', 'http://python.org/?q', ''),

498

('http://python.org/p#frag', 'http://python.org/p', 'frag'),

499

('http://python.org/p?q', 'http://python.org/p?q', ''),

500

(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),

501

(RFC2396_BASE, 'http://a/b/c/d;p?q', ''),

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

502

]

503

def _encode(t):

504

return type(t)(x.encode('ascii') for x in t)

505

bytes_cases = [_encode(x) for x in str_cases]

506

for url, defrag, frag in str_cases + bytes_cases:

507

result = urllib.parse.urldefrag(url)

508

self.assertEqual(result.geturl(), url)

509

self.assertEqual(result, (defrag, frag))

510

self.assertEqual(result.url, defrag)

511

self.assertEqual(result.fragment, frag)

Fred Drake

2002-10-16 21:02:36 +0000

[diff] [blame]

512

Коренберг Марк

fbd6051

2017-12-21 17:16:17 +0500

[diff] [blame]

513

def test_urlsplit_scoped_IPv6(self):

514

p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')

515

self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")

516

self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')

517

518

p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')

519

self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")

520

self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')

521

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

522

def test_urlsplit_attributes(self):

523

url = "HTTP://WWW.PYTHON.ORG/doc/#frag"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

524

p = urllib.parse.urlsplit(url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

525

self.assertEqual(p.scheme, "http")

526

self.assertEqual(p.netloc, "WWW.PYTHON.ORG")

527

self.assertEqual(p.path, "/doc/")

528

self.assertEqual(p.query, "")

529

self.assertEqual(p.fragment, "frag")

530

self.assertEqual(p.username, None)

531

self.assertEqual(p.password, None)

532

self.assertEqual(p.hostname, "www.python.org")

533

self.assertEqual(p.port, None)

534

# geturl() won't return exactly the original URL in this case

535

# since the scheme is always case-normalized

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

536

# We handle this by ignoring the first 4 characters of the URL

537

self.assertEqual(p.geturl()[4:], url[4:])

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

538

539

url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

540

p = urllib.parse.urlsplit(url)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

541

self.assertEqual(p.scheme, "http")

542

self.assertEqual(p.netloc, "User:Pass@www.python.org:080")

543

self.assertEqual(p.path, "/doc/")

544

self.assertEqual(p.query, "query=yes")

545

self.assertEqual(p.fragment, "frag")

546

self.assertEqual(p.username, "User")

547

self.assertEqual(p.password, "Pass")

548

self.assertEqual(p.hostname, "www.python.org")

549

self.assertEqual(p.port, 80)

550

self.assertEqual(p.geturl(), url)

551

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

552

# Addressing issue1698, which suggests Username can contain

553

# "@" characters. Though not RFC compliant, many ftp sites allow

554

# and request email addresses as usernames.

555

556

url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

557

p = urllib.parse.urlsplit(url)

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

558

self.assertEqual(p.scheme, "http")

559

self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")

560

self.assertEqual(p.path, "/doc/")

561

self.assertEqual(p.query, "query=yes")

562

self.assertEqual(p.fragment, "frag")

563

self.assertEqual(p.username, "User@example.com")

564

self.assertEqual(p.password, "Pass")

565

self.assertEqual(p.hostname, "www.python.org")

566

self.assertEqual(p.port, 80)

567

self.assertEqual(p.geturl(), url)

568

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

569

# And check them all again, only with bytes this time

570

url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"

571

p = urllib.parse.urlsplit(url)

572

self.assertEqual(p.scheme, b"http")

573

self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")

574

self.assertEqual(p.path, b"/doc/")

575

self.assertEqual(p.query, b"")

576

self.assertEqual(p.fragment, b"frag")

577

self.assertEqual(p.username, None)

578

self.assertEqual(p.password, None)

579

self.assertEqual(p.hostname, b"www.python.org")

580

self.assertEqual(p.port, None)

581

self.assertEqual(p.geturl()[4:], url[4:])

582

583

url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"

584

p = urllib.parse.urlsplit(url)

585

self.assertEqual(p.scheme, b"http")

586

self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")

587

self.assertEqual(p.path, b"/doc/")

588

self.assertEqual(p.query, b"query=yes")

589

self.assertEqual(p.fragment, b"frag")

590

self.assertEqual(p.username, b"User")

591

self.assertEqual(p.password, b"Pass")

592

self.assertEqual(p.hostname, b"www.python.org")

593

self.assertEqual(p.port, 80)

594

self.assertEqual(p.geturl(), url)

595

596

url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"

597

p = urllib.parse.urlsplit(url)

598

self.assertEqual(p.scheme, b"http")

599

self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")

600

self.assertEqual(p.path, b"/doc/")

601

self.assertEqual(p.query, b"query=yes")

602

self.assertEqual(p.fragment, b"frag")

603

self.assertEqual(p.username, b"User@example.com")

604

self.assertEqual(p.password, b"Pass")

605

self.assertEqual(p.hostname, b"www.python.org")

606

self.assertEqual(p.port, 80)

607

self.assertEqual(p.geturl(), url)

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

608

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

609

# Verify an illegal port raises ValueError

Senthil Kumaran

2fc5a50

2012-05-24 21:56:17 +0800

[diff] [blame]

610

url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"

611

p = urllib.parse.urlsplit(url)

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

612

with self.assertRaisesRegex(ValueError, "out of range"):

613

p.port

Senthil Kumaran

2fc5a50

2012-05-24 21:56:17 +0800

[diff] [blame]

614

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

615

def test_attributes_bad_port(self):

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

616

"""Check handling of invalid ports."""

617

for bytes in (False, True):

618

for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):

619

for port in ("foo", "1.5", "-1", "0x10"):

620

with self.subTest(bytes=bytes, parse=parse, port=port):

621

netloc = "www.example.net:" + port

622

url = "http://" + netloc

623

if bytes:

624

netloc = netloc.encode("ascii")

625

url = url.encode("ascii")

626

p = parse(url)

627

self.assertEqual(p.netloc, netloc)

628

with self.assertRaises(ValueError):

629

p.port

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

630

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

631

def test_attributes_without_netloc(self):

632

# This example is straight from RFC 3261. It looks like it

633

# should allow the username, hostname, and port to be filled

634

# in, but doesn't. Since it's a URI and doesn't use the

635

# scheme://netloc syntax, the netloc and related attributes

636

# should be left empty.

637

uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

638

p = urllib.parse.urlsplit(uri)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

639

self.assertEqual(p.netloc, "")

640

self.assertEqual(p.username, None)

641

self.assertEqual(p.password, None)

642

self.assertEqual(p.hostname, None)

643

self.assertEqual(p.port, None)

644

self.assertEqual(p.geturl(), uri)

645

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

646

p = urllib.parse.urlparse(uri)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

647

self.assertEqual(p.netloc, "")

648

self.assertEqual(p.username, None)

649

self.assertEqual(p.password, None)

650

self.assertEqual(p.hostname, None)

651

self.assertEqual(p.port, None)

652

self.assertEqual(p.geturl(), uri)

653

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

654

# You guessed it, repeating the test with bytes input

655

uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"

656

p = urllib.parse.urlsplit(uri)

657

self.assertEqual(p.netloc, b"")

658

self.assertEqual(p.username, None)

659

self.assertEqual(p.password, None)

660

self.assertEqual(p.hostname, None)

661

self.assertEqual(p.port, None)

662

self.assertEqual(p.geturl(), uri)

663

664

p = urllib.parse.urlparse(uri)

665

self.assertEqual(p.netloc, b"")

666

self.assertEqual(p.username, None)

667

self.assertEqual(p.password, None)

668

self.assertEqual(p.hostname, None)

669

self.assertEqual(p.port, None)

670

self.assertEqual(p.geturl(), uri)

671

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

672

def test_noslash(self):

673

# Issue 1637: http://foo.com?query is legal

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

674

self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

675

('http', 'example.com', '', '', 'blahblah=/foo', ''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

676

self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),

677

(b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

678

Senthil Kumaran

2010-08-04 04:50:44 +0000

[diff] [blame]

679

def test_withoutscheme(self):

680

# Test urlparse without scheme

681

# Issue 754016: urlparse goes wrong with IP:port without scheme

682

# RFC 1808 specifies that netloc should start with //, urlparse expects

683

# the same, otherwise it classifies the portion of url as path.

684

self.assertEqual(urllib.parse.urlparse("path"),

685

('','','path','','',''))

686

self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),

687

('','www.python.org:80','','','',''))

688

self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),

689

('http','www.python.org:80','','','',''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

690

# Repeat for bytes input

691

self.assertEqual(urllib.parse.urlparse(b"path"),

692

(b'',b'',b'path',b'',b'',b''))

693

self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),

694

(b'',b'www.python.org:80',b'',b'',b'',b''))

695

self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),

696

(b'http',b'www.python.org:80',b'',b'',b'',b''))

Senthil Kumaran

2010-08-04 04:50:44 +0000

[diff] [blame]

697

698

def test_portseparator(self):

699

# Issue 754016 makes changes for port separator ':' from scheme separator

Tim Graham

5a88d50

2019-10-18 09:07:20 -0400

[diff] [blame]

700

self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))

701

self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))

702

self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))

Senthil Kumaran

2010-08-04 04:50:44 +0000

[diff] [blame]

703

self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))

704

self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))

705

self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),

706

('http','www.python.org:80','','','',''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

707

# As usual, need to check bytes input as well

Tim Graham

5a88d50

2019-10-18 09:07:20 -0400

[diff] [blame]

708

self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))

709

self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))

710

self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

711

self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))

712

self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))

713

self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),

714

(b'http',b'www.python.org:80',b'',b'',b'',b''))

Senthil Kumaran

2010-08-04 04:50:44 +0000

[diff] [blame]

715

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

716

def test_usingsys(self):

717

# Issue 3314: sys module is used in the error

718

self.assertRaises(TypeError, urllib.parse.urlencode, "foo")

719

Senthil Kumaran

6be85c5

2010-02-19 07:42:50 +0000

[diff] [blame]

720

def test_anyscheme(self):

721

# Issue 7904: s3://foo.com/stuff has netloc "foo.com".

Ezio Melotti

5e15efa

2010-02-19 14:49:02 +0000

[diff] [blame]

722

self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),

723

('s3', 'foo.com', '/stuff', '', '', ''))

724

self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),

725

('x-newscheme', 'foo.com', '/stuff', '', '', ''))

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

726

self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),

727

('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))

728

self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),

729

('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))

730

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

731

# And for bytes...

732

self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),

733

(b's3', b'foo.com', b'/stuff', b'', b'', b''))

734

self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),

735

(b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

736

self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),

737

(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))

738

self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),

739

(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

740

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

741

def test_default_scheme(self):

742

# Exercise the scheme parameter of urlparse() and urlsplit()

743

for func in (urllib.parse.urlparse, urllib.parse.urlsplit):

744

with self.subTest(function=func):

745

result = func("http://example.net/", "ftp")

746

self.assertEqual(result.scheme, "http")

747

result = func(b"http://example.net/", b"ftp")

748

self.assertEqual(result.scheme, b"http")

749

self.assertEqual(func("path", "ftp").scheme, "ftp")

750

self.assertEqual(func("path", scheme="ftp").scheme, "ftp")

751

self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")

752

self.assertEqual(func("path").scheme, "")

753

self.assertEqual(func(b"path").scheme, b"")

754

self.assertEqual(func(b"path", "").scheme, b"")

755

756

def test_parse_fragments(self):

757

# Exercise the allow_fragments parameter of urlparse() and urlsplit()

758

tests = (

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

759

("http:#frag", "path", "frag"),

760

("//example.net#frag", "path", "frag"),

761

("index.html#frag", "path", "frag"),

762

(";a=b#frag", "params", "frag"),

763

("?a=b#frag", "query", "frag"),

764

("#frag", "path", "frag"),

765

("abc#@frag", "path", "@frag"),

766

("//abc#@frag", "path", "@frag"),

767

("//abc:80#@frag", "path", "@frag"),

768

("//abc#@frag:80", "path", "@frag:80"),

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

769

)

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

770

for url, attr, expected_frag in tests:

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

771

for func in (urllib.parse.urlparse, urllib.parse.urlsplit):

772

if attr == "params" and func is urllib.parse.urlsplit:

773

attr = "path"

774

with self.subTest(url=url, function=func):

775

result = func(url, allow_fragments=False)

776

self.assertEqual(result.fragment, "")

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

777

self.assertTrue(

778

getattr(result, attr).endswith("#" + expected_frag))

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

779

self.assertEqual(func(url, "", False).fragment, "")

780

781

result = func(url, allow_fragments=True)

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

782

self.assertEqual(result.fragment, expected_frag)

783

self.assertFalse(

784

getattr(result, attr).endswith(expected_frag))

785

self.assertEqual(func(url, "", True).fragment,

786

expected_frag)

787

self.assertEqual(func(url).fragment, expected_frag)

Berker Peksag

2015-06-25 23:38:48 +0300

[diff] [blame]

788

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

789

def test_mixed_types_rejected(self):

790

# Several functions that process either strings or ASCII encoded bytes

791

# accept multiple arguments. Check they reject mixed type input

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

792

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

793

urllib.parse.urlparse("www.python.org", b"http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

794

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

795

urllib.parse.urlparse(b"www.python.org", "http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

796

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

797

urllib.parse.urlsplit("www.python.org", b"http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

798

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

799

urllib.parse.urlsplit(b"www.python.org", "http")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

800

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

801

urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

802

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

803

urllib.parse.urlunparse(("http", b"www.python.org","","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

804

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

805

urllib.parse.urlunsplit((b"http", "www.python.org","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

806

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

807

urllib.parse.urlunsplit(("http", b"www.python.org","","",""))

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

808

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

809

urllib.parse.urljoin("http://python.org", b"http://python.org")

Ezio Melotti

2010-12-01 02:32:32 +0000

[diff] [blame]

810

with self.assertRaisesRegex(TypeError, "Cannot mix str"):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

811

urllib.parse.urljoin(b"http://python.org", "http://python.org")

812

813

def _check_result_type(self, str_type):

814

num_args = len(str_type._fields)

815

bytes_type = str_type._encoded_counterpart

816

self.assertIs(bytes_type._decoded_counterpart, str_type)

817

str_args = ('',) * num_args

818

bytes_args = (b'',) * num_args

819

str_result = str_type(*str_args)

820

bytes_result = bytes_type(*bytes_args)

821

encoding = 'ascii'

822

errors = 'strict'

823

self.assertEqual(str_result, str_args)

824

self.assertEqual(bytes_result.decode(), str_args)

825

self.assertEqual(bytes_result.decode(), str_result)

826

self.assertEqual(bytes_result.decode(encoding), str_args)

827

self.assertEqual(bytes_result.decode(encoding), str_result)

828

self.assertEqual(bytes_result.decode(encoding, errors), str_args)

829

self.assertEqual(bytes_result.decode(encoding, errors), str_result)

830

self.assertEqual(bytes_result, bytes_args)

831

self.assertEqual(str_result.encode(), bytes_args)

832

self.assertEqual(str_result.encode(), bytes_result)

833

self.assertEqual(str_result.encode(encoding), bytes_args)

834

self.assertEqual(str_result.encode(encoding), bytes_result)

835

self.assertEqual(str_result.encode(encoding, errors), bytes_args)

836

self.assertEqual(str_result.encode(encoding, errors), bytes_result)

837

838

def test_result_pairs(self):

839

# Check encoding and decoding between result pairs

840

result_types = [

841

urllib.parse.DefragResult,

842

urllib.parse.SplitResult,

843

urllib.parse.ParseResult,

844

]

845

for result_type in result_types:

846

self._check_result_type(result_type)

847

Victor Stinner

1d87deb

2011-01-14 13:05:19 +0000

[diff] [blame]

848

def test_parse_qs_encoding(self):

849

result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")

850

self.assertEqual(result, {'key': ['\u0141\xE9']})

851

result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")

852

self.assertEqual(result, {'key': ['\u0141\xE9']})

853

result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")

854

self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})

855

result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")

856

self.assertEqual(result, {'key': ['\u0141\ufffd-']})

857

result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",

858

errors="ignore")

859

self.assertEqual(result, {'key': ['\u0141-']})

860

861

def test_parse_qsl_encoding(self):

862

result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")

863

self.assertEqual(result, [('key', '\u0141\xE9')])

864

result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")

865

self.assertEqual(result, [('key', '\u0141\xE9')])

866

result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")

867

self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])

868

result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")

869

self.assertEqual(result, [('key', '\u0141\ufffd-')])

870

result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",

871

errors="ignore")

872

self.assertEqual(result, [('key', '\u0141-')])

873

matthewbelisle-wf

2091448

2018-10-19 05:52:59 -0500

[diff] [blame]

874

def test_parse_qsl_max_num_fields(self):

875

with self.assertRaises(ValueError):

876

urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)

matthewbelisle-wf

2091448

2018-10-19 05:52:59 -0500

[diff] [blame]

877

urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)

878

Adam Goldschmidt

fcbe0cb

2021-02-15 00:41:57 +0200

[diff] [blame]

879

def test_parse_qs_separator(self):

880

parse_qs_semicolon_cases = [

881

(";", {}),

882

(";;", {}),

883

(";a=b", {'a': ['b']}),

884

("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),

885

("a=1;a=2", {'a': ['1', '2']}),

886

(b";", {}),

887

(b";;", {}),

888

(b";a=b", {b'a': [b'b']}),

889

(b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),

890

(b"a=1;a=2", {b'a': [b'1', b'2']}),

891

]

892

for orig, expect in parse_qs_semicolon_cases:

893

with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):

894

result = urllib.parse.parse_qs(orig, separator=';')

895

self.assertEqual(result, expect, "Error parsing %r" % orig)

896

897

898

def test_parse_qsl_separator(self):

899

parse_qsl_semicolon_cases = [

900

(";", []),

901

(";;", []),

902

(";a=b", [('a', 'b')]),

903

("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),

904

("a=1;a=2", [('a', '1'), ('a', '2')]),

905

(b";", []),

906

(b";;", []),

907

(b";a=b", [(b'a', b'b')]),

908

(b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),

909

(b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),

910

]

911

for orig, expect in parse_qsl_semicolon_cases:

912

with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):

913

result = urllib.parse.parse_qsl(orig, separator=';')

914

self.assertEqual(result, expect, "Error parsing %r" % orig)

915

916

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

917

def test_urlencode_sequences(self):

918

# Other tests incidentally urlencode things; test non-covered cases:

919

# Sequence and object values.

920

result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)

Georg Brandl

09a7c72

2012-02-20 21:31:46 +0100

[diff] [blame]

921

# we cannot rely on ordering here

922

assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

class Trivial:

def __str__(self):

return 'trivial'

result = urllib.parse.urlencode({'a': Trivial()}, True)

929

self.assertEqual(result, 'a=trivial')

930

R David Murray

c17686f

2015-05-17 20:44:50 -0400

[diff] [blame]

931

def test_urlencode_quote_via(self):

932

result = urllib.parse.urlencode({'a': 'some value'})

933

self.assertEqual(result, "a=some+value")

934

result = urllib.parse.urlencode({'a': 'some value/another'},

935

quote_via=urllib.parse.quote)

936

self.assertEqual(result, "a=some%20value%2Fanother")

937

result = urllib.parse.urlencode({'a': 'some value/another'},

938

safe='/', quote_via=urllib.parse.quote)

939

self.assertEqual(result, "a=some%20value/another")

940

Senthil Kumaran

2011-07-23 18:27:45 +0800

[diff] [blame]

941

def test_quote_from_bytes(self):

942

self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')

943

result = urllib.parse.quote_from_bytes(b'archaeological arcana')

944

self.assertEqual(result, 'archaeological%20arcana')

945

result = urllib.parse.quote_from_bytes(b'')

946

self.assertEqual(result, '')

947

948

def test_unquote_to_bytes(self):

949

result = urllib.parse.unquote_to_bytes('abc%20def')

950

self.assertEqual(result, b'abc def')

951

result = urllib.parse.unquote_to_bytes('')

952

self.assertEqual(result, b'')

953

954

def test_quote_errors(self):

955

self.assertRaises(TypeError, urllib.parse.quote, b'foo',

956

encoding='utf-8')

957

self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')

Victor Stinner

1d87deb

2011-01-14 13:05:19 +0000

[diff] [blame]

958

Ezio Melotti

6709b7d

2012-05-19 17:15:19 +0300

[diff] [blame]

959

def test_issue14072(self):

960

p1 = urllib.parse.urlsplit('tel:+31-641044153')

961

self.assertEqual(p1.scheme, 'tel')

962

self.assertEqual(p1.path, '+31-641044153')

963

p2 = urllib.parse.urlsplit('tel:+31641044153')

964

self.assertEqual(p2.scheme, 'tel')

965

self.assertEqual(p2.path, '+31641044153')

Senthil Kumaran

ed30199

2012-12-24 14:00:20 -0800

[diff] [blame]

966

# assert the behavior for urlparse

967

p1 = urllib.parse.urlparse('tel:+31-641044153')

968

self.assertEqual(p1.scheme, 'tel')

969

self.assertEqual(p1.path, '+31-641044153')

970

p2 = urllib.parse.urlparse('tel:+31641044153')

971

self.assertEqual(p2.scheme, 'tel')

972

self.assertEqual(p2.path, '+31641044153')

973

Matt Eaton

2cb4661

2018-03-20 01:41:37 -0500

[diff] [blame]

974

def test_port_casting_failure_message(self):

975

message = "Port could not be cast to integer value as 'oracle'"

976

p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')

977

with self.assertRaisesRegex(ValueError, message):

978

p1.port

979

980

p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')

981

with self.assertRaisesRegex(ValueError, message):

982

p2.port

983

Senthil Kumaran

ed30199

2012-12-24 14:00:20 -0800

[diff] [blame]

984

def test_telurl_params(self):

985

p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')

986

self.assertEqual(p1.scheme, 'tel')

987

self.assertEqual(p1.path, '123-4')

988

self.assertEqual(p1.params, 'phone-context=+1-650-516')

989

990

p1 = urllib.parse.urlparse('tel:+1-201-555-0123')

991

self.assertEqual(p1.scheme, 'tel')

992

self.assertEqual(p1.path, '+1-201-555-0123')

993

self.assertEqual(p1.params, '')

994

995

p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')

996

self.assertEqual(p1.scheme, 'tel')

997

self.assertEqual(p1.path, '7042')

998

self.assertEqual(p1.params, 'phone-context=example.com')

999

1000

p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')

1001

self.assertEqual(p1.scheme, 'tel')

1002

self.assertEqual(p1.path, '863-1234')

1003

self.assertEqual(p1.params, 'phone-context=+1-914-555')

1004

R David Murray

f516388

2013-03-21 20:56:51 -0400

[diff] [blame]

1005

def test_Quoter_repr(self):

1006

quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)

1007

self.assertIn('Quoter', repr(quoter))

1008

Serhiy Storchaka

1515450

2015-04-07 19:09:01 +0300

[diff] [blame]

def test_all(self):

expected = []

undocumented = {

'splitattr', 'splithost', 'splitnport', 'splitpasswd',

1013

'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',

1014

'splitvalue',

1015

'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',

1016

}

1017

for name in dir(urllib.parse):

1018

if name.startswith('_') or name in undocumented:

1019

continue

1020

object = getattr(urllib.parse, name)

1021

if getattr(object, '__module__', None) == 'urllib.parse':

1022

expected.append(name)

1023

self.assertCountEqual(urllib.parse.__all__, expected)

1024

Steve Dower

16e6f7d

2019-03-07 08:02:26 -0800

[diff] [blame]

1025

def test_urlsplit_normalization(self):

1026

# Certain characters should never occur in the netloc,

1027

# including under normalization.

1028

# Ensure that ALL of them are detected and cause an error

1029

illegal_chars = '/:#?@'

1030

hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}

1031

denorm_chars = [

1032

c for c in map(chr, range(128, sys.maxunicode))

1033

if (hex_chars & set(unicodedata.decomposition(c).split()))

1034

and c not in illegal_chars

1035

]

1036

# Sanity check that we found at least one such character

1037

self.assertIn('\u2100', denorm_chars)

1038

self.assertIn('\uFF03', denorm_chars)

1039

Steve Dower

d537ab0

2019-04-30 12:03:02 +0000

[diff] [blame]

1040

# bpo-36742: Verify port separators are ignored when they

1041

# existed prior to decomposition

1042

urllib.parse.urlsplit('http://\u30d5\u309a:80')

1043

with self.assertRaises(ValueError):

1044

urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')

1045

Steve Dower

16e6f7d

2019-03-07 08:02:26 -0800

[diff] [blame]

1046

for scheme in ["http", "https", "ftp"]:

Steve Dower

8d0ef0b

2019-06-04 08:55:30 -0700

[diff] [blame]

1047

for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:

1048

for c in denorm_chars:

1049

url = "{}://{}/path".format(scheme, netloc.format(c))

1050

with self.subTest(url=url, char='{:04X}'.format(ord(c))):

1051

with self.assertRaises(ValueError):

1052

urllib.parse.urlsplit(url)

Senthil Kumaran

6be85c5

2010-02-19 07:42:50 +0000

[diff] [blame]

1053

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1054

class Utility_Tests(unittest.TestCase):

1055

"""Testcase to test the various utility functions in the urllib."""

1056

# In Python 2 this test class was in test_urllib.

1057

1058

def test_splittype(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1059

splittype = urllib.parse._splittype

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1060

self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))

1061

self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))

1062

self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))

1063

self.assertEqual(splittype('type:'), ('type', ''))

1064

self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))

1065

1066

def test_splithost(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1067

splithost = urllib.parse._splithost

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1068

self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),

1069

('www.example.org:80', '/foo/bar/baz.html'))

1070

self.assertEqual(splithost('//www.example.org:80'),

1071

('www.example.org:80', ''))

1072

self.assertEqual(splithost('/foo/bar/baz.html'),

1073

(None, '/foo/bar/baz.html'))

1074

postmasters

2017-06-20 06:02:44 -0700

[diff] [blame]

1075

# bpo-30500: # starts a fragment.

1076

self.assertEqual(splithost('//127.0.0.1#@host.com'),

1077

('127.0.0.1', '/#@host.com'))

1078

self.assertEqual(splithost('//127.0.0.1#@host.com:80'),

1079

('127.0.0.1', '/#@host.com:80'))

1080

self.assertEqual(splithost('//127.0.0.1:80#@host.com'),

1081

('127.0.0.1:80', '/#@host.com'))

1082

1083

# Empty host is returned as empty string.

1084

self.assertEqual(splithost("///file"),

1085

('', '/file'))

1086

1087

# Trailing semicolon, question mark and hash symbol are kept.

1088

self.assertEqual(splithost("//example.net/file;"),

1089

('example.net', '/file;'))

1090

self.assertEqual(splithost("//example.net/file?"),

1091

('example.net', '/file?'))

1092

self.assertEqual(splithost("//example.net/file#"),

1093

('example.net', '/file#'))

1094

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1095

def test_splituser(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1096

splituser = urllib.parse._splituser

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1097

self.assertEqual(splituser('User:Pass@www.python.org:080'),

1098

('User:Pass', 'www.python.org:080'))

1099

self.assertEqual(splituser('@www.python.org:080'),

1100

('', 'www.python.org:080'))

1101

self.assertEqual(splituser('www.python.org:080'),

1102

(None, 'www.python.org:080'))

1103

self.assertEqual(splituser('User:Pass@'),

1104

('User:Pass', ''))

1105

self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),

1106

('User@example.com:Pass', 'www.python.org:080'))

1107

1108

def test_splitpasswd(self):

1109

# Some of the password examples are not sensible, but it is added to

1110

# confirming to RFC2617 and addressing issue4675.

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1111

splitpasswd = urllib.parse._splitpasswd

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1112

self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))

1113

self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))

1114

self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))

1115

self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))

1116

self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))

1117

self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))

1118

self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))

1119

self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))

1120

self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))

1121

self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))

1122

self.assertEqual(splitpasswd('user:'), ('user', ''))

1123

self.assertEqual(splitpasswd('user'), ('user', None))

1124

self.assertEqual(splitpasswd(':ab'), ('', 'ab'))

1125

1126

def test_splitport(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1127

splitport = urllib.parse._splitport

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1128

self.assertEqual(splitport('parrot:88'), ('parrot', '88'))

1129

self.assertEqual(splitport('parrot'), ('parrot', None))

1130

self.assertEqual(splitport('parrot:'), ('parrot', None))

1131

self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))

1132

self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))

1133

self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))

1134

self.assertEqual(splitport('[::1]'), ('[::1]', None))

1135

self.assertEqual(splitport(':88'), ('', '88'))

1136

1137

def test_splitnport(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1138

splitnport = urllib.parse._splitnport

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1139

self.assertEqual(splitnport('parrot:88'), ('parrot', 88))

1140

self.assertEqual(splitnport('parrot'), ('parrot', -1))

1141

self.assertEqual(splitnport('parrot', 55), ('parrot', 55))

1142

self.assertEqual(splitnport('parrot:'), ('parrot', -1))

1143

self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))

1144

self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))

1145

self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))

1146

self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))

1147

self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))

1148

1149

def test_splitquery(self):

1150

# Normal cases are exercised by other tests; ensure that we also

1151

# catch cases with no port specified (testcase ensuring coverage)

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1152

splitquery = urllib.parse._splitquery

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1153

self.assertEqual(splitquery('http://python.org/fake?foo=bar'),

1154

('http://python.org/fake', 'foo=bar'))

1155

self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),

1156

('http://python.org/fake?foo=bar', ''))

1157

self.assertEqual(splitquery('http://python.org/fake'),

1158

('http://python.org/fake', None))

1159

self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))

1160

1161

def test_splittag(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1162

splittag = urllib.parse._splittag

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1163

self.assertEqual(splittag('http://example.com?foo=bar#baz'),

1164

('http://example.com?foo=bar', 'baz'))

1165

self.assertEqual(splittag('http://example.com?foo=bar#'),

1166

('http://example.com?foo=bar', ''))

1167

self.assertEqual(splittag('#baz'), ('', 'baz'))

1168

self.assertEqual(splittag('http://example.com?foo=bar'),

1169

('http://example.com?foo=bar', None))

1170

self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),

1171

('http://example.com?foo=bar#baz', 'boo'))

1172

1173

def test_splitattr(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1174

splitattr = urllib.parse._splitattr

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1175

self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),

1176

('/path', ['attr1=value1', 'attr2=value2']))

1177

self.assertEqual(splitattr('/path;'), ('/path', ['']))

1178

self.assertEqual(splitattr(';attr1=value1;attr2=value2'),

1179

('', ['attr1=value1', 'attr2=value2']))

1180

self.assertEqual(splitattr('/path'), ('/path', []))

1181

1182

def test_splitvalue(self):

1183

# Normal cases are exercised by other tests; test pathological cases

1184

# with no key/value pairs. (testcase ensuring coverage)

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1185

splitvalue = urllib.parse._splitvalue

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1186

self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))

1187

self.assertEqual(splitvalue('foo='), ('foo', ''))

1188

self.assertEqual(splitvalue('=bar'), ('', 'bar'))

1189

self.assertEqual(splitvalue('foobar'), ('foobar', None))

1190

self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))

1191

1192

def test_to_bytes(self):

Cheryl Sabella

2018-06-03 10:31:32 -0400

[diff] [blame]

1193

result = urllib.parse._to_bytes('http://www.python.org')

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1194

self.assertEqual(result, 'http://www.python.org')

Cheryl Sabella

0250de4

2018-04-25 16:51:54 -0700

[diff] [blame]

1195

self.assertRaises(UnicodeError, urllib.parse._to_bytes,

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1196

'http://www.python.org/medi\u00e6val')

1197

1198

def test_unwrap(self):

Rémi Lapeyre

674ee12

2019-05-27 15:43:45 +0200

[diff] [blame]

1199

for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',

1200

'URL:scheme://host/path', 'scheme://host/path'):

1201

url = urllib.parse.unwrap(wrapped_url)

1202

self.assertEqual(url, 'scheme://host/path')

Serhiy Storchaka

2015-03-02 16:32:29 +0200

[diff] [blame]

1203

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

1204

Cheryl Sabella

0250de4

2018-04-25 16:51:54 -0700

[diff] [blame]

1205

class DeprecationTest(unittest.TestCase):

1206

1207

def test_splittype_deprecation(self):

1208

with self.assertWarns(DeprecationWarning) as cm:

1209

urllib.parse.splittype('')

1210

self.assertEqual(str(cm.warning),

1211

'urllib.parse.splittype() is deprecated as of 3.8, '

1212

'use urllib.parse.urlparse() instead')

1213

1214

def test_splithost_deprecation(self):

1215

with self.assertWarns(DeprecationWarning) as cm:

1216

urllib.parse.splithost('')

1217

self.assertEqual(str(cm.warning),

1218

'urllib.parse.splithost() is deprecated as of 3.8, '

1219

'use urllib.parse.urlparse() instead')

1220

1221

def test_splituser_deprecation(self):

1222

with self.assertWarns(DeprecationWarning) as cm:

1223

urllib.parse.splituser('')

1224

self.assertEqual(str(cm.warning),

1225

'urllib.parse.splituser() is deprecated as of 3.8, '

1226

'use urllib.parse.urlparse() instead')

1227

1228

def test_splitpasswd_deprecation(self):

1229

with self.assertWarns(DeprecationWarning) as cm:

1230

urllib.parse.splitpasswd('')

1231

self.assertEqual(str(cm.warning),

1232

'urllib.parse.splitpasswd() is deprecated as of 3.8, '

1233

'use urllib.parse.urlparse() instead')

1234

1235

def test_splitport_deprecation(self):

1236

with self.assertWarns(DeprecationWarning) as cm:

1237

urllib.parse.splitport('')

1238

self.assertEqual(str(cm.warning),

1239

'urllib.parse.splitport() is deprecated as of 3.8, '

1240

'use urllib.parse.urlparse() instead')

1241

1242

def test_splitnport_deprecation(self):

1243

with self.assertWarns(DeprecationWarning) as cm:

1244

urllib.parse.splitnport('')

1245

self.assertEqual(str(cm.warning),

1246

'urllib.parse.splitnport() is deprecated as of 3.8, '

1247

'use urllib.parse.urlparse() instead')

1248

1249

def test_splitquery_deprecation(self):

1250

with self.assertWarns(DeprecationWarning) as cm:

1251

urllib.parse.splitquery('')

1252

self.assertEqual(str(cm.warning),

1253

'urllib.parse.splitquery() is deprecated as of 3.8, '

1254

'use urllib.parse.urlparse() instead')

1255

1256

def test_splittag_deprecation(self):

1257

with self.assertWarns(DeprecationWarning) as cm:

1258

urllib.parse.splittag('')

1259

self.assertEqual(str(cm.warning),

1260

'urllib.parse.splittag() is deprecated as of 3.8, '

1261

'use urllib.parse.urlparse() instead')

1262

1263

def test_splitattr_deprecation(self):

1264

with self.assertWarns(DeprecationWarning) as cm:

1265

urllib.parse.splitattr('')

1266

self.assertEqual(str(cm.warning),

1267

'urllib.parse.splitattr() is deprecated as of 3.8, '

1268

'use urllib.parse.urlparse() instead')

1269

1270

def test_splitvalue_deprecation(self):

1271

with self.assertWarns(DeprecationWarning) as cm:

1272

urllib.parse.splitvalue('')

1273

self.assertEqual(str(cm.warning),

1274

'urllib.parse.splitvalue() is deprecated as of 3.8, '

1275

'use urllib.parse.parse_qsl() instead')

1276

1277

def test_to_bytes_deprecation(self):

1278

with self.assertWarns(DeprecationWarning) as cm:

1279

urllib.parse.to_bytes('')

1280

self.assertEqual(str(cm.warning),

1281

'urllib.parse.to_bytes() is deprecated as of 3.8')

1282

Cheryl Sabella

0250de4

2018-04-25 16:51:54 -0700

[diff] [blame]

1283

Skip Montanaro

2002-03-23 05:32:10 +0000

[diff] [blame]

1284

if __name__ == "__main__":

Serhiy Storchaka