Blame - Lib/urllib/parse.py - platform/external/python/cpython3

2008-06-18 20:49:58 +0000

[diff] [blame]

1

"""Parse (absolute and relative) URLs.

2

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

3

urlparse module is based upon the following RFC specifications.

4

5

RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding

6

and L. Masinter, January 2005.

7

8

RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter

9

and L.Masinter, December 1999.

10

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

11

RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

12

Berners-Lee, R. Fielding, and L. Masinter, August 1998.

13

David Malcolm

ee25568

2010-12-02 16:41:00 +0000

[diff] [blame]

14

RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

15

16

RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June

17

1995.

18

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

19

RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

20

McCahill, December 1994

21

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

22

RFC 3986 is considered the current standard and any future changes to

23

urlparse module should conform with it. The urlparse module is

24

currently not entirely compliant with this RFC due to defacto

25

scenarios for parsing, and for backward compatibility purposes, some

26

parsing quirks from older RFCs are retained. The testcases in

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

27

test_urlparse.py provides a good indicator of parsing behavior.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

28

"""

29

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

30

import re

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

31

import sys

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

32

import collections

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

33

import warnings

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

34

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

35

__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",

Senthil Kumaran

0256b2a

2010-10-25 16:36:20 +0000

[diff] [blame]

36

"urlsplit", "urlunsplit", "urlencode", "parse_qs",

37

"parse_qsl", "quote", "quote_plus", "quote_from_bytes",

Serhiy Storchaka

1515450

2015-04-07 19:09:01 +0300

[diff] [blame]

38

"unquote", "unquote_plus", "unquote_to_bytes",

39

"DefragResult", "ParseResult", "SplitResult",

40

"DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

41

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

42

# A classification of schemes.

43

# The empty string classifies URLs with no scheme specified,

44

# being the default value returned by “urlsplit” and “urlparse”.

45

46

uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap',

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

47

'wais', 'file', 'https', 'shttp', 'mms',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

48

'prospero', 'rtsp', 'rtspu', 'sftp',

Berker Peksag

f676748

2016-09-16 14:43:58 +0300

[diff] [blame]

49

'svn', 'svn+ssh', 'ws', 'wss']

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

50

51

uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet',

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

52

'imap', 'wais', 'file', 'mms', 'https', 'shttp',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

53

'snews', 'prospero', 'rtsp', 'rtspu', 'rsync',

Berker Peksag

f676748

2016-09-16 14:43:58 +0300

[diff] [blame]

54

'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh',

55

'ws', 'wss']

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

56

57

uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap',

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

58

'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

59

'mms', 'sftp', 'tel']

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

60

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

61

# These are not actually used anymore, but should stay for backwards

62

# compatibility. (They are undocumented, but have a public-looking name.)

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

63

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

64

non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',

65

'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

66

67

uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms',

68

'gopher', 'rtsp', 'rtspu', 'sip', 'sips']

69

70

uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news',

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

71

'nntp', 'wais', 'https', 'shttp', 'snews',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

72

'file', 'prospero']

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

73

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

74

# Characters valid in scheme names

75

scheme_chars = ('abcdefghijklmnopqrstuvwxyz'

76

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

'0123456789'

'+-.')

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

80

# XXX: Consider replacing with functools.lru_cache

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

MAX_CACHE_SIZE = 20

_parse_cache = {}

def clear_cache():

Antoine Pitrou

2df5fc7

2009-12-08 19:38:17 +0000

[diff] [blame]

85

"""Clear the parse cache and the quoters cache."""

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

86

_parse_cache.clear()

Antoine Pitrou

2df5fc7

2009-12-08 19:38:17 +0000

[diff] [blame]

87

_safe_quoters.clear()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

88

89

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

90

# Helpers for bytes handling

91

# For 3.2, we deliberately require applications that

92

# handle improperly quoted URLs to do their own

93

# decoding and encoding. If valid use cases are

94

# presented, we may relax this by using latin-1

95

# decoding internally for 3.3

96

_implicit_encoding = 'ascii'

97

_implicit_errors = 'strict'

def _noop(obj):

return obj

def _encode_result(obj, encoding=_implicit_encoding,

103

errors=_implicit_errors):

104

return obj.encode(encoding, errors)

105

106

def _decode_args(args, encoding=_implicit_encoding,

107

errors=_implicit_errors):

108

return tuple(x.decode(encoding, errors) if x else '' for x in args)

109

110

def _coerce_args(*args):

111

# Invokes decode if necessary to create str args

112

# and returns the coerced inputs along with

113

# an appropriate result coercion function

114

# - noop for str inputs

115

# - encoding function otherwise

116

str_input = isinstance(args[0], str)

117

for arg in args[1:]:

118

# We special-case the empty string to support the

119

# "scheme=''" default argument to some functions

120

if arg and isinstance(arg, str) != str_input:

121

raise TypeError("Cannot mix str and non-str arguments")

122

if str_input:

123

return args + (_noop,)

124

return _decode_args(args) + (_encode_result,)

125

126

# Result objects are more helpful than simple tuples

127

class _ResultMixinStr(object):

128

"""Standard approach to encoding parsed results from str to bytes"""

129

__slots__ = ()

130

131

def encode(self, encoding='ascii', errors='strict'):

132

return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))

133

134

135

class _ResultMixinBytes(object):

136

"""Standard approach to decoding parsed results from bytes to str"""

137

__slots__ = ()

138

139

def decode(self, encoding='ascii', errors='strict'):

140

return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))

141

142

143

class _NetlocResultMixinBase(object):

144

"""Shared methods for the parsed result objects containing a netloc element"""

145

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

146

147

@property

148

def username(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

149

return self._userinfo[0]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

150

151

@property

152

def password(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

153

return self._userinfo[1]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

154

155

@property

156

def hostname(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

157

hostname = self._hostinfo[0]

158

if not hostname:

Коренберг Марк

fbd6051

2017-12-21 17:16:17 +0500

[diff] [blame]

159

return None

160

# Scoped IPv6 address may have zone info, which must not be lowercased

161

# like http://[fe80::822a:a8ff:fe49:470c%tESt]:1234/keys

162

separator = '%' if isinstance(hostname, str) else b'%'

163

hostname, percent, zone = hostname.partition(separator)

164

return hostname.lower() + percent + zone

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

165

166

@property

167

def port(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

168

port = self._hostinfo[1]

169

if port is not None:

Matt Eaton

2cb4661

2018-03-20 01:41:37 -0500

[diff] [blame]

try:

port = int(port, 10)

except ValueError:

message = f'Port could not be cast to integer value as {port!r}'

174

raise ValueError(message) from None

Senthil Kumaran

2fc5a50

2012-05-24 21:56:17 +0800

[diff] [blame]

175

if not ( 0 <= port <= 65535):

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

176

raise ValueError("Port out of range 0-65535")

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

return port

class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):

__slots__ = ()

@property

def _userinfo(self):

netloc = self.netloc

userinfo, have_info, hostinfo = netloc.rpartition('@')

187

if have_info:

188

username, have_password, password = userinfo.partition(':')

189

if not have_password:

190

password = None

Senthil Kumaran

ad02d23

2010-04-16 03:02:13 +0000

[diff] [blame]

191

else:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

192

username = password = None

193

return username, password

@property

def _hostinfo(self):

netloc = self.netloc

_, _, hostinfo = netloc.rpartition('@')

199

_, have_open_br, bracketed = hostinfo.partition('[')

200

if have_open_br:

201

hostname, _, port = bracketed.partition(']')

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

202

_, _, port = port.partition(':')

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

203

else:

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

204

hostname, _, port = hostinfo.partition(':')

205

if not port:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

206

port = None

207

return hostname, port

208

209

210

class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):

__slots__ = ()

@property

def _userinfo(self):

netloc = self.netloc

userinfo, have_info, hostinfo = netloc.rpartition(b'@')

217

if have_info:

218

username, have_password, password = userinfo.partition(b':')

219

if not have_password:

220

password = None

221

else:

222

username = password = None

223

return username, password

@property

def _hostinfo(self):

netloc = self.netloc

_, _, hostinfo = netloc.rpartition(b'@')

229

_, have_open_br, bracketed = hostinfo.partition(b'[')

230

if have_open_br:

231

hostname, _, port = bracketed.partition(b']')

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

232

_, _, port = port.partition(b':')

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

233

else:

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

234

hostname, _, port = hostinfo.partition(b':')

235

if not port:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

236

port = None

237

return hostname, port

238

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

239

240

from collections import namedtuple

241

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

242

_DefragResultBase = namedtuple('DefragResult', 'url fragment')

Senthil Kumaran

86f7109

2016-01-14 00:11:39 -0800

[diff] [blame]

243

_SplitResultBase = namedtuple(

244

'SplitResult', 'scheme netloc path query fragment')

245

_ParseResultBase = namedtuple(

246

'ParseResult', 'scheme netloc path params query fragment')

247

248

_DefragResultBase.__doc__ = """

249

DefragResult(url, fragment)

250

251

A 2-tuple that contains the url without fragment identifier and the fragment

252

identifier as a separate argument.

253

"""

254

255

_DefragResultBase.url.__doc__ = """The URL with no fragment identifier."""

256

257

_DefragResultBase.fragment.__doc__ = """

258

Fragment identifier separated from URL, that allows indirect identification of a

259

secondary resource by reference to a primary resource and additional identifying

information.

"""

_SplitResultBase.__doc__ = """

264

SplitResult(scheme, netloc, path, query, fragment)

265

266

A 5-tuple that contains the different components of a URL. Similar to

267

ParseResult, but does not split params.

268

"""

269

270

_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request."""

271

272

_SplitResultBase.netloc.__doc__ = """

273

Network location where the request is made to.

274

"""

275

276

_SplitResultBase.path.__doc__ = """

277

The hierarchical path, such as the path to a file to download.

278

"""

279

280

_SplitResultBase.query.__doc__ = """

281

The query component, that contains non-hierarchical data, that along with data

282

in path component, identifies a resource in the scope of URI's scheme and

network location.

"""

_SplitResultBase.fragment.__doc__ = """

287

Fragment identifier, that allows indirect identification of a secondary resource

288

by reference to a primary resource and additional identifying information.

289

"""

290

291

_ParseResultBase.__doc__ = """

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

292

ParseResult(scheme, netloc, path, params, query, fragment)

Senthil Kumaran

86f7109

2016-01-14 00:11:39 -0800

[diff] [blame]

293

294

A 6-tuple that contains components of a parsed URL.

295

"""

296

297

_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__

298

_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__

299

_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__

300

_ParseResultBase.params.__doc__ = """

301

Parameters for last path element used to dereference the URI in order to provide

302

access to perform some operation on the resource.

303

"""

304

305

_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__

306

_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__

307

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

308

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

309

# For backwards compatibility, alias _NetlocResultMixinStr

310

# ResultBase is no longer part of the documented API, but it is

311

# retained since deprecating it isn't worth the hassle

312

ResultBase = _NetlocResultMixinStr

313

314

# Structured result objects for string data

315

class DefragResult(_DefragResultBase, _ResultMixinStr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

316

__slots__ = ()

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

317

def geturl(self):

318

if self.fragment:

319

return self.url + '#' + self.fragment

320

else:

321

return self.url

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

322

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

323

class SplitResult(_SplitResultBase, _NetlocResultMixinStr):

324

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

325

def geturl(self):

326

return urlunsplit(self)

327

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

328

class ParseResult(_ParseResultBase, _NetlocResultMixinStr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

329

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

330

def geturl(self):

331

return urlunparse(self)

332

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

333

# Structured result objects for bytes data

334

class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):

__slots__ = ()

def geturl(self):

if self.fragment:

return self.url + b'#' + self.fragment

else:

return self.url

class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):

343

__slots__ = ()

344

def geturl(self):

345

return urlunsplit(self)

346

347

class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):

348

__slots__ = ()

349

def geturl(self):

350

return urlunparse(self)

351

352

# Set up the encode/decode result pairs

353

def _fix_result_transcoding():

354

_result_pairs = (

355

(DefragResult, DefragResultBytes),

356

(SplitResult, SplitResultBytes),

357

(ParseResult, ParseResultBytes),

358

)

359

for _decoded, _encoded in _result_pairs:

360

_decoded._encoded_counterpart = _encoded

361

_encoded._decoded_counterpart = _decoded

362

363

_fix_result_transcoding()

364

del _fix_result_transcoding

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

365

366

def urlparse(url, scheme='', allow_fragments=True):

367

"""Parse a URL into 6 components:

368

369

Return a 6-tuple: (scheme, netloc, path, params, query, fragment).

370

Note that we don't break the components up in smaller bits

371

(e.g. netloc is a single string) and we don't expand % escapes."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

372

url, scheme, _coerce_result = _coerce_args(url, scheme)

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

373

splitresult = urlsplit(url, scheme, allow_fragments)

374

scheme, netloc, url, query, fragment = splitresult

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

375

if scheme in uses_params and ';' in url:

376

url, params = _splitparams(url)

377

else:

378

params = ''

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

379

result = ParseResult(scheme, netloc, url, params, query, fragment)

380

return _coerce_result(result)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

381

382

def _splitparams(url):

383

if '/' in url:

384

i = url.find(';', url.rfind('/'))

if i < 0:

return url, ''

else:

i = url.find(';')

return url[:i], url[i+1:]

390

391

def _splitnetloc(url, start=0):

392

delim = len(url) # position of end of domain part of url, default is end

393

for c in '/?#': # look for delimiters; the order is NOT important

394

wdelim = url.find(c, start) # find first of this delim

395

if wdelim >= 0: # if found

396

delim = min(delim, wdelim) # use earliest delim position

397

return url[start:delim], url[delim:] # return (domain, rest)

398

Steve Dower

2019-03-07 08:02:26 -0800

[diff] [blame]

399

def _checknetloc(netloc):

400

if not netloc or netloc.isascii():

401

return

402

# looking for characters like \u2100 that expand to 'a/c'

403

# IDNA uses NFKC equivalence, so normalize for this check

404

import unicodedata

Steve Dower

8d0ef0b

2019-06-04 08:55:30 -0700

[diff] [blame]

405

n = netloc.replace('@', '') # ignore characters already included

406

n = n.replace(':', '') # but not the surrounding text

407

n = n.replace('#', '')

Steve Dower

d537ab0

2019-04-30 12:03:02 +0000

[diff] [blame]

408

n = n.replace('?', '')

409

netloc2 = unicodedata.normalize('NFKC', n)

410

if n == netloc2:

Steve Dower

2019-03-07 08:02:26 -0800

[diff] [blame]

411

return

Steve Dower

2019-03-07 08:02:26 -0800

[diff] [blame]

412

for c in '/?#@:':

413

if c in netloc2:

Steve Dower

d537ab0

2019-04-30 12:03:02 +0000

[diff] [blame]

414

raise ValueError("netloc '" + netloc + "' contains invalid " +

Steve Dower

2019-03-07 08:02:26 -0800

[diff] [blame]

415

"characters under NFKC normalization")

416

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

417

def urlsplit(url, scheme='', allow_fragments=True):

418

"""Parse a URL into 5 components:

419

420

Return a 5-tuple: (scheme, netloc, path, query, fragment).

421

Note that we don't break the components up in smaller bits

422

(e.g. netloc is a single string) and we don't expand % escapes."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

423

url, scheme, _coerce_result = _coerce_args(url, scheme)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

424

allow_fragments = bool(allow_fragments)

425

key = url, scheme, allow_fragments, type(url), type(scheme)

426

cached = _parse_cache.get(key, None)

427

if cached:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

428

return _coerce_result(cached)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

429

if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth

430

clear_cache()

431

netloc = query = fragment = ''

432

i = url.find(':')

433

if i > 0:

Senthil Kumaran

397eb44

2011-04-15 18:20:24 +0800

[diff] [blame]

434

for c in url[:i]:

435

if c not in scheme_chars:

436

break

437

else:

Senthil Kumaran

0f3187c

2019-10-18 08:23:14 -0700

[diff] [blame]

438

scheme, url = url[:i].lower(), url[i+1:]

Senthil Kumaran

397eb44

2011-04-15 18:20:24 +0800

[diff] [blame]

439

Senthil Kumaran

6be85c5

2010-02-19 07:42:50 +0000

[diff] [blame]

440

if url[:2] == '//':

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

441

netloc, url = _splitnetloc(url, 2)

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

442

if (('[' in netloc and ']' not in netloc) or

443

(']' in netloc and '[' not in netloc)):

444

raise ValueError("Invalid IPv6 URL")

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

445

if allow_fragments and '#' in url:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

446

url, fragment = url.split('#', 1)

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

447

if '?' in url:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

448

url, query = url.split('?', 1)

Steve Dower

2019-03-07 08:02:26 -0800

[diff] [blame]

449

_checknetloc(netloc)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

450

v = SplitResult(scheme, netloc, url, query, fragment)

451

_parse_cache[key] = v

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

452

return _coerce_result(v)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

453

454

def urlunparse(components):

455

"""Put a parsed URL back together again. This may result in a

456

slightly different, but equivalent URL, if the URL that was parsed

457

originally had redundant delimiters, e.g. a ? with an empty query

458

(the draft states that these are equivalent)."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

459

scheme, netloc, url, params, query, fragment, _coerce_result = (

460

_coerce_args(*components))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

461

if params:

462

url = "%s;%s" % (url, params)

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

463

return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

464

465

def urlunsplit(components):

Senthil Kumaran

8749a63

2010-06-28 14:08:00 +0000

[diff] [blame]

466

"""Combine the elements of a tuple as returned by urlsplit() into a

467

complete URL as a string. The data argument can be any five-item iterable.

468

This may result in a slightly different, but equivalent URL, if the URL that

469

was parsed originally had unnecessary delimiters (for example, a ? with an

470

empty query; the RFC states that these are equivalent)."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

471

scheme, netloc, url, query, fragment, _coerce_result = (

472

_coerce_args(*components))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

473

if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):

474

if url and url[:1] != '/': url = '/' + url

475

url = '//' + (netloc or '') + url

476

if scheme:

477

url = scheme + ':' + url

478

if query:

479

url = url + '?' + query

480

if fragment:

481

url = url + '#' + fragment

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

482

return _coerce_result(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

483

484

def urljoin(base, url, allow_fragments=True):

485

"""Join a base URL and a possibly relative URL to form an absolute

486

interpretation of the latter."""

if not base:

return url

if not url:

return base

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

491

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

492

base, url, _coerce_result = _coerce_args(base, url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

493

bscheme, bnetloc, bpath, bparams, bquery, bfragment = \

494

urlparse(base, '', allow_fragments)

495

scheme, netloc, path, params, query, fragment = \

496

urlparse(url, bscheme, allow_fragments)

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

497

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

498

if scheme != bscheme or scheme not in uses_relative:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

499

return _coerce_result(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

500

if scheme in uses_netloc:

501

if netloc:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

502

return _coerce_result(urlunparse((scheme, netloc, path,

503

params, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

504

netloc = bnetloc

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

505

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame]

506

if not path and not params:

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

507

path = bpath

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame]

508

params = bparams

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

509

if not query:

510

query = bquery

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

511

return _coerce_result(urlunparse((scheme, netloc, path,

512

params, query, fragment)))

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

513

514

base_parts = bpath.split('/')

515

if base_parts[-1] != '':

516

# the last item is not a directory, so will not be taken into account

517

# in resolving the relative path

518

del base_parts[-1]

519

520

# for rfc3986, ignore all base path should the first character be root.

521

if path[:1] == '/':

522

segments = path.split('/')

523

else:

524

segments = base_parts + path.split('/')

Senthil Kumaran

a66e388

2014-09-22 15:49:16 +0800

[diff] [blame]

525

# filter out elements that would cause redundant slashes on re-joining

526

# the resolved_path

Berker Peksag

20416f7

2015-04-16 02:31:14 +0300

[diff] [blame]

527

segments[1:-1] = filter(None, segments[1:-1])

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

resolved_path = []

for seg in segments:

if seg == '..':

try:

resolved_path.pop()

except IndexError:

# ignore any .. segments that would otherwise cause an IndexError

537

# when popped from resolved_path if resolving for rfc3986

538

pass

539

elif seg == '.':

540

continue

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

541

else:

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

542

resolved_path.append(seg)

543

544

if segments[-1] in ('.', '..'):

545

# do some post-processing here. if the last segment was a relative dir,

546

# then we need to append the trailing '/'

547

resolved_path.append('')

548

549

return _coerce_result(urlunparse((scheme, netloc, '/'.join(

Senthil Kumaran

a66e388

2014-09-22 15:49:16 +0800

[diff] [blame]

550

resolved_path) or '/', params, query, fragment)))

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

551

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

552

553

def urldefrag(url):

554

"""Removes any existing fragment from URL.

555

556

Returns a tuple of the defragmented URL and the fragment. If

557

the URL contained no fragments, the second element is the

558

empty string.

559

"""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

560

url, _coerce_result = _coerce_args(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

561

if '#' in url:

562

s, n, p, a, q, frag = urlparse(url)

563

defrag = urlunparse((s, n, p, a, q, ''))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

564

else:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

565

frag = ''

566

defrag = url

567

return _coerce_result(DefragResult(defrag, frag))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

568

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

569

_hexdig = '0123456789ABCDEFabcdef'

Victor Stinner

d6a91a7

2014-03-17 22:38:41 +0100

[diff] [blame]

570

_hextobyte = None

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

571

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

572

def unquote_to_bytes(string):

573

"""unquote_to_bytes('abc%20def') -> b'abc def'."""

574

# Note: strings are encoded as UTF-8. This is only an issue if it contains

575

# unescaped non-ASCII characters, which URIs should not.

Florent Xicluna

82a3f8a

2010-08-14 18:30:35 +0000

[diff] [blame]

576

if not string:

577

# Is it a string-like object?

578

string.split

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

579

return b''

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

580

if isinstance(string, str):

581

string = string.encode('utf-8')

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

582

bits = string.split(b'%')

583

if len(bits) == 1:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

584

return string

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

585

res = [bits[0]]

586

append = res.append

Victor Stinner

d6a91a7

2014-03-17 22:38:41 +0100

[diff] [blame]

587

# Delay the initialization of the table to not waste memory

588

# if the function is never called

589

global _hextobyte

590

if _hextobyte is None:

Serhiy Storchaka

8cbd3df

2016-12-21 12:59:28 +0200

[diff] [blame]

591

_hextobyte = {(a + b).encode(): bytes.fromhex(a + b)

Victor Stinner

d6a91a7

2014-03-17 22:38:41 +0100

[diff] [blame]

592

for a in _hexdig for b in _hexdig}

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

593

for item in bits[1:]:

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

594

try:

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

595

append(_hextobyte[item[:2]])

append(item[2:])

except KeyError:

append(b'%')

append(item)

return b''.join(res)

_asciire = re.compile('([\x00-\x7f]+)')

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

603

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

604

def unquote(string, encoding='utf-8', errors='replace'):

605

"""Replace %xx escapes by their single-character equivalent. The optional

606

encoding and errors parameters specify how to decode percent-encoded

607

sequences into Unicode characters, as accepted by the bytes.decode()

608

method.

609

By default, percent-encoded sequences are decoded with UTF-8, and invalid

610

sequences are replaced by a placeholder character.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

611

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

612

unquote('abc%20def') -> 'abc def'.

613

"""

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

614

if '%' not in string:

615

string.split

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

return string

if encoding is None:

encoding = 'utf-8'

if errors is None:

errors = 'replace'

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

621

bits = _asciire.split(string)

622

res = [bits[0]]

623

append = res.append

624

for i in range(1, len(bits), 2):

625

append(unquote_to_bytes(bits[i]).decode(encoding, errors))

626

append(bits[i + 1])

627

return ''.join(res)

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

628

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

629

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

630

def parse_qs(qs, keep_blank_values=False, strict_parsing=False,

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

631

encoding='utf-8', errors='replace', max_num_fields=None):

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

632

"""Parse a query given as a string argument.

Arguments:

Senthil Kumaran

2010-08-09 20:01:35 +0000

[diff] [blame]

636

qs: percent-encoded query string to be parsed

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

637

638

keep_blank_values: flag indicating whether blank values in

Senthil Kumaran

30e86a4

2010-08-09 20:01:35 +0000

[diff] [blame]

639

percent-encoded queries should be treated as blank strings.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

640

A true value indicates that blanks should be retained as

641

blank strings. The default false value indicates that

642

blank values are to be ignored and treated as if they were

643

not included.

644

645

strict_parsing: flag indicating what to do with parsing errors.

646

If false (the default), errors are silently ignored.

647

If true, errors raise a ValueError exception.

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

648

649

encoding and errors: specify how to decode percent-encoded sequences

650

into Unicode characters, as accepted by the bytes.decode() method.

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

651

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

652

max_num_fields: int. If set, then throws a ValueError if there

653

are more than n fields read by parse_qsl().

654

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

655

Returns a dictionary.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

656

"""

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

657

parsed_result = {}

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

658

pairs = parse_qsl(qs, keep_blank_values, strict_parsing,

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

659

encoding=encoding, errors=errors,

660

max_num_fields=max_num_fields)

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

661

for name, value in pairs:

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

662

if name in parsed_result:

663

parsed_result[name].append(value)

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

664

else:

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

665

parsed_result[name] = [value]

666

return parsed_result

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

667

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

668

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

669

def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

670

encoding='utf-8', errors='replace', max_num_fields=None):

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

671

"""Parse a query given as a string argument.

672

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

673

Arguments:

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

674

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

675

qs: percent-encoded query string to be parsed

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

676

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

677

keep_blank_values: flag indicating whether blank values in

678

percent-encoded queries should be treated as blank strings.

679

A true value indicates that blanks should be retained as blank

680

strings. The default false value indicates that blank values

681

are to be ignored and treated as if they were not included.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

682

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

683

strict_parsing: flag indicating what to do with parsing errors. If

684

false (the default), errors are silently ignored. If true,

685

errors raise a ValueError exception.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

686

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

687

encoding and errors: specify how to decode percent-encoded sequences

688

into Unicode characters, as accepted by the bytes.decode() method.

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

689

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

690

max_num_fields: int. If set, then throws a ValueError

691

if there are more than n fields read by parse_qsl().

692

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

693

Returns a list, as G-d intended.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

694

"""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

695

qs, _coerce_result = _coerce_args(qs)

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

696

697

# If max_num_fields is defined then check that the number of fields

698

# is less than max_num_fields. This prevents a memory exhaustion DOS

699

# attack via post bodies with many fields.

700

if max_num_fields is not None:

701

num_fields = 1 + qs.count('&') + qs.count(';')

702

if max_num_fields < num_fields:

703

raise ValueError('Max number of fields exceeded')

704

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

705

pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]

706

r = []

707

for name_value in pairs:

708

if not name_value and not strict_parsing:

709

continue

710

nv = name_value.split('=', 1)

711

if len(nv) != 2:

712

if strict_parsing:

713

raise ValueError("bad query field: %r" % (name_value,))

714

# Handle case of a control-name with no equal sign

715

if keep_blank_values:

nv.append('')

else:

continue

if len(nv[1]) or keep_blank_values:

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

720

name = nv[0].replace('+', ' ')

721

name = unquote(name, encoding=encoding, errors=errors)

722

name = _coerce_result(name)

723

value = nv[1].replace('+', ' ')

724

value = unquote(value, encoding=encoding, errors=errors)

725

value = _coerce_result(value)

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

726

r.append((name, value))

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

727

return r

728

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

729

def unquote_plus(string, encoding='utf-8', errors='replace'):

730

"""Like unquote(), but also replace plus signs by spaces, as required for

731

unquoting HTML form values.

732

733

unquote_plus('%7e/abc+def') -> '~/abc def'

734

"""

735

string = string.replace('+', ' ')

736

return unquote(string, encoding, errors)

737

738

_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

739

b'abcdefghijklmnopqrstuvwxyz'

740

b'0123456789'

Ratnadeep Debnath

21024f0

2017-02-25 14:30:28 +0530

[diff] [blame]

741

b'_.-~')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

742

_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)

743

_safe_quoters = {}

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

744

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

745

class Quoter(collections.defaultdict):

746

"""A mapping from bytes (in range(0,256)) to strings.

747

748

String values are percent-encoded byte values, unless the key < 128, and

749

in the "safe" set (either the specified safe set, or default set).

750

"""

751

# Keeps a cache internally, using defaultdict, for efficiency (lookups

752

# of cached keys don't call Python code at all).

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

753

def __init__(self, safe):

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

754

"""safe: bytes object."""

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

755

self.safe = _ALWAYS_SAFE.union(safe)

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

756

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

757

def __repr__(self):

758

# Without this, will just display as a defaultdict

Serhiy Storchaka

465e60e

2014-07-25 23:36:00 +0300

[diff] [blame]

759

return "<%s %r>" % (self.__class__.__name__, dict(self))

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

760

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

761

def __missing__(self, b):

762

# Handle a cache miss. Store quoted string in cache and return.

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

763

res = chr(b) if b in self.safe else '%{:02X}'.format(b)

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

self[b] = res

return res

def quote(string, safe='/', encoding=None, errors=None):

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

768

"""quote('abc def') -> 'abc%20def'

769

770

Each part of a URL, e.g. the path info, the query, etc., has a

Jörn Hees

2019-04-10 02:31:18 +0200

[diff] [blame]

771

different set of reserved characters that must be quoted. The

772

quote function offers a cautious (not minimal) way to quote a

773

string for most of these parts.

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

774

Jörn Hees

2019-04-10 02:31:18 +0200

[diff] [blame]

775

RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists

776

the following (un)reserved characters.

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

777

Jörn Hees

2019-04-10 02:31:18 +0200

[diff] [blame]

778

unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"

779

reserved = gen-delims / sub-delims

780

gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"

781

sub-delims = "!" / "$" / "&" / "'" / "(" / ")"

782

/ "*" / "+" / "," / ";" / "="

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

783

Jörn Hees

2019-04-10 02:31:18 +0200

[diff] [blame]

784

Each of the reserved characters is reserved in some component of a URL,

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

785

but not necessarily in all of them.

786

Jörn Hees

2019-04-10 02:31:18 +0200

[diff] [blame]

787

The quote function %-escapes all characters that are neither in the

788

unreserved chars ("always safe") nor the additional chars set via the

789

safe arg.

Ratnadeep Debnath

21024f0

2017-02-25 14:30:28 +0530

[diff] [blame]

790

Jörn Hees

2019-04-10 02:31:18 +0200

[diff] [blame]

791

The default for the safe arg is '/'. The character is reserved, but in

792

typical usage the quote function is being called on a path where the

793

existing slash characters are to be preserved.

794

795

Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings.

796

Now, "~" is included in the set of unreserved characters.

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

797

R David Murray

8c4e112

2014-12-24 21:23:18 -0500

[diff] [blame]

798

string and safe may be either str or bytes objects. encoding and errors

799

must not be specified if string is a bytes object.

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

800

801

The optional encoding and errors parameters specify how to deal with

802

non-ASCII characters, as accepted by the str.encode method.

803

By default, encoding='utf-8' (characters are encoded with UTF-8), and

804

errors='strict' (unsupported characters raise a UnicodeEncodeError).

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

805

"""

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

806

if isinstance(string, str):

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

807

if not string:

808

return string

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

if encoding is None:

encoding = 'utf-8'

if errors is None:

errors = 'strict'

string = string.encode(encoding, errors)

814

else:

815

if encoding is not None:

816

raise TypeError("quote() doesn't support 'encoding' for bytes")

817

if errors is not None:

818

raise TypeError("quote() doesn't support 'errors' for bytes")

819

return quote_from_bytes(string, safe)

820

821

def quote_plus(string, safe='', encoding=None, errors=None):

822

"""Like quote(), but also replace ' ' with '+', as required for quoting

823

HTML form values. Plus signs in the original string are escaped unless

824

they are included in safe. It also does not have safe default to '/'.

825

"""

Jeremy Hylton

f819886

2009-03-26 16:55:08 +0000

[diff] [blame]

826

# Check if ' ' in string, where string may either be a str or bytes. If

827

# there are no spaces, the regular quote will produce the right answer.

828

if ((isinstance(string, str) and ' ' not in string) or

829

(isinstance(string, bytes) and b' ' not in string)):

830

return quote(string, safe, encoding, errors)

831

if isinstance(safe, str):

832

space = ' '

833

else:

834

space = b' '

Georg Brandl

faf4149

2009-05-26 18:31:11 +0000

[diff] [blame]

835

string = quote(string, safe + space, encoding, errors)

Jeremy Hylton

f819886

2009-03-26 16:55:08 +0000

[diff] [blame]

836

return string.replace(' ', '+')

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

837

838

def quote_from_bytes(bs, safe='/'):

839

"""Like quote(), but accepts a bytes object rather than a str, and does

840

not perform string-to-bytes encoding. It always returns an ASCII string.

Senthil Kumaran

ffa4b2c

2012-05-26 09:53:32 +0800

[diff] [blame]

841

quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

842

"""

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

843

if not isinstance(bs, (bytes, bytearray)):

844

raise TypeError("quote_from_bytes() expected bytes")

845

if not bs:

846

return ''

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

847

if isinstance(safe, str):

848

# Normalize 'safe' by converting to bytes and removing non-ASCII chars

849

safe = safe.encode('ascii', 'ignore')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

850

else:

851

safe = bytes([c for c in safe if c < 128])

852

if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):

853

return bs.decode()

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

854

try:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

855

quoter = _safe_quoters[safe]

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

856

except KeyError:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

857

_safe_quoters[safe] = quoter = Quoter(safe).__getitem__

858

return ''.join([quoter(char) for char in bs])

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

859

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

860

def urlencode(query, doseq=False, safe='', encoding=None, errors=None,

861

quote_via=quote_plus):

Senthil Kumaran

324ae385

2013-09-05 21:42:38 -0700

[diff] [blame]

862

"""Encode a dict or sequence of two-element tuples into a URL query string.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

863

864

If any values in the query arg are sequences and doseq is true, each

865

sequence element is converted to a separate parameter.

866

867

If the query arg is a sequence of two-element tuples, the order of the

868

parameters in the output will match the order of parameters in the

869

input.

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

870

Senthil Kumaran

324ae385

2013-09-05 21:42:38 -0700

[diff] [blame]

871

The components of a query arg may each be either a string or a bytes type.

R David Murray

8c4e112

2014-12-24 21:23:18 -0500

[diff] [blame]

872

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

873

The safe, encoding, and errors parameters are passed down to the function

874

specified by quote_via (encoding and errors only if a component is a str).

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

875

"""

876

Jeremy Hylton

a4de60a

2009-03-26 14:49:26 +0000

[diff] [blame]

877

if hasattr(query, "items"):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

878

query = query.items()

879

else:

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

880

# It's a bother at times that strings and string-like objects are

881

# sequences.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

882

try:

883

# non-sequence items should not work with len()

884

# non-empty strings will fail this

885

if len(query) and not isinstance(query[0], tuple):

886

raise TypeError

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

887

# Zero-length sequences of all types will get here and succeed,

888

# but that's a minor nit. Since the original implementation

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

889

# allowed empty dicts that type of behavior probably should be

890

# preserved for consistency

891

except TypeError:

Jeremy Hylton

a4de60a

2009-03-26 14:49:26 +0000

[diff] [blame]

892

ty, va, tb = sys.exc_info()

893

raise TypeError("not a valid non-string sequence "

894

"or mapping object").with_traceback(tb)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

895

896

l = []

897

if not doseq:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

898

for k, v in query:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

899

if isinstance(k, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

900

k = quote_via(k, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

901

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

902

k = quote_via(str(k), safe, encoding, errors)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

903

904

if isinstance(v, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

905

v = quote_via(v, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

906

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

907

v = quote_via(str(v), safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

908

l.append(k + '=' + v)

909

else:

910

for k, v in query:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

911

if isinstance(k, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

912

k = quote_via(k, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

913

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

914

k = quote_via(str(k), safe, encoding, errors)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

915

916

if isinstance(v, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

917

v = quote_via(v, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

918

l.append(k + '=' + v)

919

elif isinstance(v, str):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

920

v = quote_via(v, safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

921

l.append(k + '=' + v)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

922

else:

923

try:

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

924

# Is this a sufficient test for sequence-ness?

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

925

x = len(v)

926

except TypeError:

927

# not a sequence

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

928

v = quote_via(str(v), safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

929

l.append(k + '=' + v)

930

else:

931

# loop over the sequence

932

for elt in v:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

933

if isinstance(elt, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

934

elt = quote_via(elt, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

935

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

936

elt = quote_via(str(elt), safe, encoding, errors)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

937

l.append(k + '=' + elt)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

938

return '&'.join(l)

939

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

940

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

941

def to_bytes(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

942

warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8",

943

DeprecationWarning, stacklevel=2)

944

return _to_bytes(url)

945

946

947

def _to_bytes(url):

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

948

"""to_bytes(u"URL") --> 'URL'."""

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

949

# Most URL schemes require ASCII. If that changes, the conversion

950

# can be relaxed.

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

951

# XXX get rid of to_bytes()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

952

if isinstance(url, str):

953

try:

954

url = url.encode("ASCII").decode()

955

except UnicodeError:

956

raise UnicodeError("URL " + repr(url) +

957

" contains non-ASCII characters")

958

return url

959

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

960

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

961

def unwrap(url):

Rémi Lapeyre

674ee12

2019-05-27 15:43:45 +0200

[diff] [blame]

962

"""Transform a string like '<URL:scheme://host/path>' into 'scheme://host/path'.

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

963

Rémi Lapeyre

674ee12

2019-05-27 15:43:45 +0200

[diff] [blame]

964

The string is returned unchanged if it's not a wrapped URL.

965

"""

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

966

url = str(url).strip()

967

if url[:1] == '<' and url[-1:] == '>':

968

url = url[1:-1].strip()

Rémi Lapeyre

674ee12

2019-05-27 15:43:45 +0200

[diff] [blame]

969

if url[:4] == 'URL:':

970

url = url[4:].strip()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

971

return url

972

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

973

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

974

def splittype(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

975

warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, "

976

"use urllib.parse.urlparse() instead",

977

DeprecationWarning, stacklevel=2)

978

return _splittype(url)

_typeprog = None

def _splittype(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

983

"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""

984

global _typeprog

985

if _typeprog is None:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

986

_typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

987

988

match = _typeprog.match(url)

989

if match:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

990

scheme, data = match.groups()

991

return scheme.lower(), data

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

992

return None, url

993

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

994

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

995

def splithost(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

996

warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, "

997

"use urllib.parse.urlparse() instead",

998

DeprecationWarning, stacklevel=2)

999

return _splithost(url)

_hostprog = None

def _splithost(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1004

"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""

1005

global _hostprog

1006

if _hostprog is None:

postmasters

90e01e5

2017-06-20 06:02:44 -0700

[diff] [blame]

1007

_hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1008

1009

match = _hostprog.match(url)

Senthil Kumaran

c295862

2010-11-22 04:48:26 +0000

[diff] [blame]

1010

if match:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1011

host_port, path = match.groups()

1012

if path and path[0] != '/':

Senthil Kumaran

c295862

2010-11-22 04:48:26 +0000

[diff] [blame]

1013

path = '/' + path

1014

return host_port, path

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1015

return None, url

1016

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1017

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1018

def splituser(host):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1019

warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, "

1020

"use urllib.parse.urlparse() instead",

1021

DeprecationWarning, stacklevel=2)

1022

return _splituser(host)

1023

1024

1025

def _splituser(host):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1026

"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1027

user, delim, host = host.rpartition('@')

1028

return (user if delim else None), host

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1029

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1030

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1031

def splitpasswd(user):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1032

warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, "

1033

"use urllib.parse.urlparse() instead",

1034

DeprecationWarning, stacklevel=2)

1035

return _splitpasswd(user)

1036

1037

1038

def _splitpasswd(user):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1039

"""splitpasswd('user:passwd') -> 'user', 'passwd'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1040

user, delim, passwd = user.partition(':')

1041

return user, (passwd if delim else None)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1042

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1043

1044

def splitport(host):

1045

warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, "

1046

"use urllib.parse.urlparse() instead",

1047

DeprecationWarning, stacklevel=2)

1048

return _splitport(host)

1049

1050

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1051

# splittag('/path#tag') --> '/path', 'tag'

1052

_portprog = None

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1053

def _splitport(host):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1054

"""splitport('host:port') --> 'host', 'port'."""

1055

global _portprog

1056

if _portprog is None:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1057

_portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1058

1059

match = _portprog.match(host)

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

1060

if match:

1061

host, port = match.groups()

1062

if port:

1063

return host, port

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1064

return host, None

1065

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1066

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1067

def splitnport(host, defport=-1):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1068

warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, "

1069

"use urllib.parse.urlparse() instead",

1070

DeprecationWarning, stacklevel=2)

1071

return _splitnport(host, defport)

1072

1073

1074

def _splitnport(host, defport=-1):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1075

"""Split host and port, returning numeric port.

1076

Return given default port if no ':' found; defaults to -1.

1077

Return numerical port if a valid number are found after ':'.

1078

Return None if ':' but not a valid number."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1079

host, delim, port = host.rpartition(':')

if not delim:

host = port

elif port:

try:

nport = int(port)

except ValueError:

nport = None

return host, nport

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1088

return host, defport

1089

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1090

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1091

def splitquery(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1092

warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, "

1093

"use urllib.parse.urlparse() instead",

1094

DeprecationWarning, stacklevel=2)

1095

return _splitquery(url)

1096

1097

1098

def _splitquery(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1099

"""splitquery('/path?query') --> '/path', 'query'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1100

path, delim, query = url.rpartition('?')

1101

if delim:

1102

return path, query

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1103

return url, None

1104

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1105

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1106

def splittag(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1107

warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, "

1108

"use urllib.parse.urlparse() instead",

1109

DeprecationWarning, stacklevel=2)

1110

return _splittag(url)

1111

1112

1113

def _splittag(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1114

"""splittag('/path#tag') --> '/path', 'tag'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1115

path, delim, tag = url.rpartition('#')

1116

if delim:

1117

return path, tag

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1118

return url, None

1119

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1120

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1121

def splitattr(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1122

warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, "

1123

"use urllib.parse.urlparse() instead",

1124

DeprecationWarning, stacklevel=2)

1125

return _splitattr(url)

1126

1127

1128

def _splitattr(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1129

"""splitattr('/path;attr1=value1;attr2=value2;...') ->

1130

'/path', ['attr1=value1', 'attr2=value2', ...]."""

1131

words = url.split(';')

1132

return words[0], words[1:]

1133

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1134

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1135

def splitvalue(attr):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1136

warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, "

1137

"use urllib.parse.parse_qsl() instead",

1138

DeprecationWarning, stacklevel=2)

1139

return _splitvalue(attr)

1140

1141

1142

def _splitvalue(attr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1143

"""splitvalue('attr=value') --> 'attr', 'value'."""

Serhiy Storchaka