Blame - Lib/urllib/parse.py - platform/external/python/cpython3

2008-06-18 20:49:58 +0000

[diff] [blame]

1

"""Parse (absolute and relative) URLs.

2

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

3

urlparse module is based upon the following RFC specifications.

4

5

RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding

6

and L. Masinter, January 2005.

7

8

RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter

9

and L.Masinter, December 1999.

10

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

11

RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

12

Berners-Lee, R. Fielding, and L. Masinter, August 1998.

13

David Malcolm

ee25568

2010-12-02 16:41:00 +0000

[diff] [blame]

14

RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

15

16

RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June

17

1995.

18

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

19

RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

20

McCahill, December 1994

21

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

22

RFC 3986 is considered the current standard and any future changes to

23

urlparse module should conform with it. The urlparse module is

24

currently not entirely compliant with this RFC due to defacto

25

scenarios for parsing, and for backward compatibility purposes, some

26

parsing quirks from older RFCs are retained. The testcases in

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

27

test_urlparse.py provides a good indicator of parsing behavior.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

28

"""

29

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

30

import re

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

31

import sys

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

32

import collections

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

33

import warnings

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

34

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

35

__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",

Senthil Kumaran

0256b2a

2010-10-25 16:36:20 +0000

[diff] [blame]

36

"urlsplit", "urlunsplit", "urlencode", "parse_qs",

37

"parse_qsl", "quote", "quote_plus", "quote_from_bytes",

Serhiy Storchaka

1515450

2015-04-07 19:09:01 +0300

[diff] [blame]

38

"unquote", "unquote_plus", "unquote_to_bytes",

39

"DefragResult", "ParseResult", "SplitResult",

40

"DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

41

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

42

# A classification of schemes.

43

# The empty string classifies URLs with no scheme specified,

44

# being the default value returned by “urlsplit” and “urlparse”.

45

46

uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap',

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

47

'wais', 'file', 'https', 'shttp', 'mms',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

48

'prospero', 'rtsp', 'rtspu', 'sftp',

Berker Peksag

f676748

2016-09-16 14:43:58 +0300

[diff] [blame]

49

'svn', 'svn+ssh', 'ws', 'wss']

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

50

51

uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet',

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

52

'imap', 'wais', 'file', 'mms', 'https', 'shttp',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

53

'snews', 'prospero', 'rtsp', 'rtspu', 'rsync',

Berker Peksag

f676748

2016-09-16 14:43:58 +0300

[diff] [blame]

54

'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh',

55

'ws', 'wss']

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

56

57

uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap',

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

58

'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

59

'mms', 'sftp', 'tel']

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

60

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

61

# These are not actually used anymore, but should stay for backwards

62

# compatibility. (They are undocumented, but have a public-looking name.)

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

63

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

64

non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',

65

'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

66

67

uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms',

68

'gopher', 'rtsp', 'rtspu', 'sip', 'sips']

69

70

uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news',

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

71

'nntp', 'wais', 'https', 'shttp', 'snews',

Senthil Kumaran

2017-05-17 21:48:59 -0700

[diff] [blame]

72

'file', 'prospero']

Georg Brandl

2012-08-24 18:15:29 +0200

[diff] [blame]

73

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

74

# Characters valid in scheme names

75

scheme_chars = ('abcdefghijklmnopqrstuvwxyz'

76

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

'0123456789'

'+-.')

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

80

# XXX: Consider replacing with functools.lru_cache

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

MAX_CACHE_SIZE = 20

_parse_cache = {}

def clear_cache():

Antoine Pitrou

2df5fc7

2009-12-08 19:38:17 +0000

[diff] [blame]

85

"""Clear the parse cache and the quoters cache."""

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

86

_parse_cache.clear()

Antoine Pitrou

2df5fc7

2009-12-08 19:38:17 +0000

[diff] [blame]

87

_safe_quoters.clear()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

88

89

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

90

# Helpers for bytes handling

91

# For 3.2, we deliberately require applications that

92

# handle improperly quoted URLs to do their own

93

# decoding and encoding. If valid use cases are

94

# presented, we may relax this by using latin-1

95

# decoding internally for 3.3

96

_implicit_encoding = 'ascii'

97

_implicit_errors = 'strict'

def _noop(obj):

return obj

def _encode_result(obj, encoding=_implicit_encoding,

103

errors=_implicit_errors):

104

return obj.encode(encoding, errors)

105

106

def _decode_args(args, encoding=_implicit_encoding,

107

errors=_implicit_errors):

108

return tuple(x.decode(encoding, errors) if x else '' for x in args)

109

110

def _coerce_args(*args):

111

# Invokes decode if necessary to create str args

112

# and returns the coerced inputs along with

113

# an appropriate result coercion function

114

# - noop for str inputs

115

# - encoding function otherwise

116

str_input = isinstance(args[0], str)

117

for arg in args[1:]:

118

# We special-case the empty string to support the

119

# "scheme=''" default argument to some functions

120

if arg and isinstance(arg, str) != str_input:

121

raise TypeError("Cannot mix str and non-str arguments")

122

if str_input:

123

return args + (_noop,)

124

return _decode_args(args) + (_encode_result,)

125

126

# Result objects are more helpful than simple tuples

127

class _ResultMixinStr(object):

128

"""Standard approach to encoding parsed results from str to bytes"""

129

__slots__ = ()

130

131

def encode(self, encoding='ascii', errors='strict'):

132

return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))

133

134

135

class _ResultMixinBytes(object):

136

"""Standard approach to decoding parsed results from bytes to str"""

137

__slots__ = ()

138

139

def decode(self, encoding='ascii', errors='strict'):

140

return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))

141

142

143

class _NetlocResultMixinBase(object):

144

"""Shared methods for the parsed result objects containing a netloc element"""

145

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

146

147

@property

148

def username(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

149

return self._userinfo[0]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

150

151

@property

152

def password(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

153

return self._userinfo[1]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

154

155

@property

156

def hostname(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

157

hostname = self._hostinfo[0]

158

if not hostname:

Коренберг Марк

fbd6051

2017-12-21 17:16:17 +0500

[diff] [blame]

159

return None

160

# Scoped IPv6 address may have zone info, which must not be lowercased

161

# like http://[fe80::822a:a8ff:fe49:470c%tESt]:1234/keys

162

separator = '%' if isinstance(hostname, str) else b'%'

163

hostname, percent, zone = hostname.partition(separator)

164

return hostname.lower() + percent + zone

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

165

166

@property

167

def port(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

168

port = self._hostinfo[1]

169

if port is not None:

Matt Eaton

2cb4661

2018-03-20 01:41:37 -0500

[diff] [blame]

try:

port = int(port, 10)

except ValueError:

message = f'Port could not be cast to integer value as {port!r}'

174

raise ValueError(message) from None

Senthil Kumaran

2fc5a50

2012-05-24 21:56:17 +0800

[diff] [blame]

175

if not ( 0 <= port <= 65535):

Robert Collins

dfa95c9

2015-08-10 09:53:30 +1200

[diff] [blame]

176

raise ValueError("Port out of range 0-65535")

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

return port

class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):

__slots__ = ()

@property

def _userinfo(self):

netloc = self.netloc

userinfo, have_info, hostinfo = netloc.rpartition('@')

187

if have_info:

188

username, have_password, password = userinfo.partition(':')

189

if not have_password:

190

password = None

Senthil Kumaran

ad02d23

2010-04-16 03:02:13 +0000

[diff] [blame]

191

else:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

192

username = password = None

193

return username, password

@property

def _hostinfo(self):

netloc = self.netloc

_, _, hostinfo = netloc.rpartition('@')

199

_, have_open_br, bracketed = hostinfo.partition('[')

200

if have_open_br:

201

hostname, _, port = bracketed.partition(']')

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

202

_, _, port = port.partition(':')

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

203

else:

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

204

hostname, _, port = hostinfo.partition(':')

205

if not port:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

206

port = None

207

return hostname, port

208

209

210

class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):

__slots__ = ()

@property

def _userinfo(self):

netloc = self.netloc

userinfo, have_info, hostinfo = netloc.rpartition(b'@')

217

if have_info:

218

username, have_password, password = userinfo.partition(b':')

219

if not have_password:

220

password = None

221

else:

222

username = password = None

223

return username, password

@property

def _hostinfo(self):

netloc = self.netloc

_, _, hostinfo = netloc.rpartition(b'@')

229

_, have_open_br, bracketed = hostinfo.partition(b'[')

230

if have_open_br:

231

hostname, _, port = bracketed.partition(b']')

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

232

_, _, port = port.partition(b':')

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

233

else:

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

234

hostname, _, port = hostinfo.partition(b':')

235

if not port:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

236

port = None

237

return hostname, port

238

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

239

240

from collections import namedtuple

241

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

242

_DefragResultBase = namedtuple('DefragResult', 'url fragment')

Senthil Kumaran

86f7109

2016-01-14 00:11:39 -0800

[diff] [blame]

243

_SplitResultBase = namedtuple(

244

'SplitResult', 'scheme netloc path query fragment')

245

_ParseResultBase = namedtuple(

246

'ParseResult', 'scheme netloc path params query fragment')

247

248

_DefragResultBase.__doc__ = """

249

DefragResult(url, fragment)

250

251

A 2-tuple that contains the url without fragment identifier and the fragment

252

identifier as a separate argument.

253

"""

254

255

_DefragResultBase.url.__doc__ = """The URL with no fragment identifier."""

256

257

_DefragResultBase.fragment.__doc__ = """

258

Fragment identifier separated from URL, that allows indirect identification of a

259

secondary resource by reference to a primary resource and additional identifying

information.

"""

_SplitResultBase.__doc__ = """

264

SplitResult(scheme, netloc, path, query, fragment)

265

266

A 5-tuple that contains the different components of a URL. Similar to

267

ParseResult, but does not split params.

268

"""

269

270

_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request."""

271

272

_SplitResultBase.netloc.__doc__ = """

273

Network location where the request is made to.

274

"""

275

276

_SplitResultBase.path.__doc__ = """

277

The hierarchical path, such as the path to a file to download.

278

"""

279

280

_SplitResultBase.query.__doc__ = """

281

The query component, that contains non-hierarchical data, that along with data

282

in path component, identifies a resource in the scope of URI's scheme and

network location.

"""

_SplitResultBase.fragment.__doc__ = """

287

Fragment identifier, that allows indirect identification of a secondary resource

288

by reference to a primary resource and additional identifying information.

289

"""

290

291

_ParseResultBase.__doc__ = """

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

292

ParseResult(scheme, netloc, path, params, query, fragment)

Senthil Kumaran

86f7109

2016-01-14 00:11:39 -0800

[diff] [blame]

293

294

A 6-tuple that contains components of a parsed URL.

295

"""

296

297

_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__

298

_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__

299

_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__

300

_ParseResultBase.params.__doc__ = """

301

Parameters for last path element used to dereference the URI in order to provide

302

access to perform some operation on the resource.

303

"""

304

305

_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__

306

_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__

307

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

308

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

309

# For backwards compatibility, alias _NetlocResultMixinStr

310

# ResultBase is no longer part of the documented API, but it is

311

# retained since deprecating it isn't worth the hassle

312

ResultBase = _NetlocResultMixinStr

313

314

# Structured result objects for string data

315

class DefragResult(_DefragResultBase, _ResultMixinStr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

316

__slots__ = ()

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

317

def geturl(self):

318

if self.fragment:

319

return self.url + '#' + self.fragment

320

else:

321

return self.url

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

322

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

323

class SplitResult(_SplitResultBase, _NetlocResultMixinStr):

324

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

325

def geturl(self):

326

return urlunsplit(self)

327

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

328

class ParseResult(_ParseResultBase, _NetlocResultMixinStr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

329

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

330

def geturl(self):

331

return urlunparse(self)

332

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

333

# Structured result objects for bytes data

334

class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):

__slots__ = ()

def geturl(self):

if self.fragment:

return self.url + b'#' + self.fragment

else:

return self.url

class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):

343

__slots__ = ()

344

def geturl(self):

345

return urlunsplit(self)

346

347

class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):

348

__slots__ = ()

349

def geturl(self):

350

return urlunparse(self)

351

352

# Set up the encode/decode result pairs

353

def _fix_result_transcoding():

354

_result_pairs = (

355

(DefragResult, DefragResultBytes),

356

(SplitResult, SplitResultBytes),

357

(ParseResult, ParseResultBytes),

358

)

359

for _decoded, _encoded in _result_pairs:

360

_decoded._encoded_counterpart = _encoded

361

_encoded._decoded_counterpart = _decoded

362

363

_fix_result_transcoding()

364

del _fix_result_transcoding

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

365

366

def urlparse(url, scheme='', allow_fragments=True):

367

"""Parse a URL into 6 components:

368

369

Return a 6-tuple: (scheme, netloc, path, params, query, fragment).

370

Note that we don't break the components up in smaller bits

371

(e.g. netloc is a single string) and we don't expand % escapes."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

372

url, scheme, _coerce_result = _coerce_args(url, scheme)

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

373

splitresult = urlsplit(url, scheme, allow_fragments)

374

scheme, netloc, url, query, fragment = splitresult

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

375

if scheme in uses_params and ';' in url:

376

url, params = _splitparams(url)

377

else:

378

params = ''

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

379

result = ParseResult(scheme, netloc, url, params, query, fragment)

380

return _coerce_result(result)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

381

382

def _splitparams(url):

383

if '/' in url:

384

i = url.find(';', url.rfind('/'))

if i < 0:

return url, ''

else:

i = url.find(';')

return url[:i], url[i+1:]

390

391

def _splitnetloc(url, start=0):

392

delim = len(url) # position of end of domain part of url, default is end

393

for c in '/?#': # look for delimiters; the order is NOT important

394

wdelim = url.find(c, start) # find first of this delim

395

if wdelim >= 0: # if found

396

delim = min(delim, wdelim) # use earliest delim position

397

return url[start:delim], url[delim:] # return (domain, rest)

398

Steve Dower

16e6f7d

2019-03-07 08:02:26 -0800

[diff] [blame^]

399

def _checknetloc(netloc):

400

if not netloc or netloc.isascii():

401

return

402

# looking for characters like \u2100 that expand to 'a/c'

403

# IDNA uses NFKC equivalence, so normalize for this check

404

import unicodedata

405

netloc2 = unicodedata.normalize('NFKC', netloc)

406

if netloc == netloc2:

407

return

408

_, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay

409

for c in '/?#@:':

410

if c in netloc2:

411

raise ValueError("netloc '" + netloc2 + "' contains invalid " +

412

"characters under NFKC normalization")

413

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

414

def urlsplit(url, scheme='', allow_fragments=True):

415

"""Parse a URL into 5 components:

416

417

Return a 5-tuple: (scheme, netloc, path, query, fragment).

418

Note that we don't break the components up in smaller bits

419

(e.g. netloc is a single string) and we don't expand % escapes."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

420

url, scheme, _coerce_result = _coerce_args(url, scheme)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

421

allow_fragments = bool(allow_fragments)

422

key = url, scheme, allow_fragments, type(url), type(scheme)

423

cached = _parse_cache.get(key, None)

424

if cached:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

425

return _coerce_result(cached)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

426

if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth

427

clear_cache()

428

netloc = query = fragment = ''

429

i = url.find(':')

430

if i > 0:

431

if url[:i] == 'http': # optimize the common case

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

432

url = url[i+1:]

433

if url[:2] == '//':

434

netloc, url = _splitnetloc(url, 2)

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

435

if (('[' in netloc and ']' not in netloc) or

436

(']' in netloc and '[' not in netloc)):

437

raise ValueError("Invalid IPv6 URL")

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

438

if allow_fragments and '#' in url:

439

url, fragment = url.split('#', 1)

440

if '?' in url:

441

url, query = url.split('?', 1)

Steve Dower

16e6f7d

2019-03-07 08:02:26 -0800

[diff] [blame^]

442

_checknetloc(netloc)

Oren Milman

8df44ee

2017-09-03 07:51:39 +0300

[diff] [blame]

443

v = SplitResult('http', netloc, url, query, fragment)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

444

_parse_cache[key] = v

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

445

return _coerce_result(v)

Senthil Kumaran

397eb44

2011-04-15 18:20:24 +0800

[diff] [blame]

446

for c in url[:i]:

447

if c not in scheme_chars:

448

break

449

else:

Ezio Melotti

6709b7d

2012-05-19 17:15:19 +0300

[diff] [blame]

450

# make sure "url" is not actually a port number (in which case

451

# "scheme" is really part of the path)

452

rest = url[i+1:]

453

if not rest or any(c not in '0123456789' for c in rest):

454

# not a port number

455

scheme, url = url[:i].lower(), rest

Senthil Kumaran

397eb44

2011-04-15 18:20:24 +0800

[diff] [blame]

456

Senthil Kumaran

6be85c5

2010-02-19 07:42:50 +0000

[diff] [blame]

457

if url[:2] == '//':

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

458

netloc, url = _splitnetloc(url, 2)

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

459

if (('[' in netloc and ']' not in netloc) or

460

(']' in netloc and '[' not in netloc)):

461

raise ValueError("Invalid IPv6 URL")

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

462

if allow_fragments and '#' in url:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

463

url, fragment = url.split('#', 1)

Senthil Kumaran

1be320e

2012-05-19 08:12:00 +0800

[diff] [blame]

464

if '?' in url:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

465

url, query = url.split('?', 1)

Steve Dower

16e6f7d

2019-03-07 08:02:26 -0800

[diff] [blame^]

466

_checknetloc(netloc)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

467

v = SplitResult(scheme, netloc, url, query, fragment)

468

_parse_cache[key] = v

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

469

return _coerce_result(v)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

470

471

def urlunparse(components):

472

"""Put a parsed URL back together again. This may result in a

473

slightly different, but equivalent URL, if the URL that was parsed

474

originally had redundant delimiters, e.g. a ? with an empty query

475

(the draft states that these are equivalent)."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

476

scheme, netloc, url, params, query, fragment, _coerce_result = (

477

_coerce_args(*components))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

478

if params:

479

url = "%s;%s" % (url, params)

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

480

return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

481

482

def urlunsplit(components):

Senthil Kumaran

8749a63

2010-06-28 14:08:00 +0000

[diff] [blame]

483

"""Combine the elements of a tuple as returned by urlsplit() into a

484

complete URL as a string. The data argument can be any five-item iterable.

485

This may result in a slightly different, but equivalent URL, if the URL that

486

was parsed originally had unnecessary delimiters (for example, a ? with an

487

empty query; the RFC states that these are equivalent)."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

488

scheme, netloc, url, query, fragment, _coerce_result = (

489

_coerce_args(*components))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

490

if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):

491

if url and url[:1] != '/': url = '/' + url

492

url = '//' + (netloc or '') + url

493

if scheme:

494

url = scheme + ':' + url

495

if query:

496

url = url + '?' + query

497

if fragment:

498

url = url + '#' + fragment

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

499

return _coerce_result(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

500

501

def urljoin(base, url, allow_fragments=True):

502

"""Join a base URL and a possibly relative URL to form an absolute

503

interpretation of the latter."""

if not base:

return url

if not url:

return base

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

508

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

509

base, url, _coerce_result = _coerce_args(base, url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

510

bscheme, bnetloc, bpath, bparams, bquery, bfragment = \

511

urlparse(base, '', allow_fragments)

512

scheme, netloc, path, params, query, fragment = \

513

urlparse(url, bscheme, allow_fragments)

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

514

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

515

if scheme != bscheme or scheme not in uses_relative:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

516

return _coerce_result(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

517

if scheme in uses_netloc:

518

if netloc:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

519

return _coerce_result(urlunparse((scheme, netloc, path,

520

params, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

521

netloc = bnetloc

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

522

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame]

523

if not path and not params:

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

524

path = bpath

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame]

525

params = bparams

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

526

if not query:

527

query = bquery

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

528

return _coerce_result(urlunparse((scheme, netloc, path,

529

params, query, fragment)))

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

530

531

base_parts = bpath.split('/')

532

if base_parts[-1] != '':

533

# the last item is not a directory, so will not be taken into account

534

# in resolving the relative path

535

del base_parts[-1]

536

537

# for rfc3986, ignore all base path should the first character be root.

538

if path[:1] == '/':

539

segments = path.split('/')

540

else:

541

segments = base_parts + path.split('/')

Senthil Kumaran

a66e388

2014-09-22 15:49:16 +0800

[diff] [blame]

542

# filter out elements that would cause redundant slashes on re-joining

543

# the resolved_path

Berker Peksag

20416f7

2015-04-16 02:31:14 +0300

[diff] [blame]

544

segments[1:-1] = filter(None, segments[1:-1])

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

resolved_path = []

for seg in segments:

if seg == '..':

try:

resolved_path.pop()

except IndexError:

# ignore any .. segments that would otherwise cause an IndexError

554

# when popped from resolved_path if resolving for rfc3986

555

pass

556

elif seg == '.':

557

continue

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

558

else:

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

559

resolved_path.append(seg)

560

561

if segments[-1] in ('.', '..'):

562

# do some post-processing here. if the last segment was a relative dir,

563

# then we need to append the trailing '/'

564

resolved_path.append('')

565

566

return _coerce_result(urlunparse((scheme, netloc, '/'.join(

Senthil Kumaran

a66e388

2014-09-22 15:49:16 +0800

[diff] [blame]

567

resolved_path) or '/', params, query, fragment)))

Antoine Pitrou

2014-08-21 19:16:17 -0400

[diff] [blame]

568

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

569

570

def urldefrag(url):

571

"""Removes any existing fragment from URL.

572

573

Returns a tuple of the defragmented URL and the fragment. If

574

the URL contained no fragments, the second element is the

575

empty string.

576

"""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

577

url, _coerce_result = _coerce_args(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

578

if '#' in url:

579

s, n, p, a, q, frag = urlparse(url)

580

defrag = urlunparse((s, n, p, a, q, ''))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

581

else:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

582

frag = ''

583

defrag = url

584

return _coerce_result(DefragResult(defrag, frag))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

585

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

586

_hexdig = '0123456789ABCDEFabcdef'

Victor Stinner

d6a91a7

2014-03-17 22:38:41 +0100

[diff] [blame]

587

_hextobyte = None

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

588

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

589

def unquote_to_bytes(string):

590

"""unquote_to_bytes('abc%20def') -> b'abc def'."""

591

# Note: strings are encoded as UTF-8. This is only an issue if it contains

592

# unescaped non-ASCII characters, which URIs should not.

Florent Xicluna

82a3f8a

2010-08-14 18:30:35 +0000

[diff] [blame]

593

if not string:

594

# Is it a string-like object?

595

string.split

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

596

return b''

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

597

if isinstance(string, str):

598

string = string.encode('utf-8')

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

599

bits = string.split(b'%')

600

if len(bits) == 1:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

601

return string

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

602

res = [bits[0]]

603

append = res.append

Victor Stinner

d6a91a7

2014-03-17 22:38:41 +0100

[diff] [blame]

604

# Delay the initialization of the table to not waste memory

605

# if the function is never called

606

global _hextobyte

607

if _hextobyte is None:

Serhiy Storchaka

8cbd3df

2016-12-21 12:59:28 +0200

[diff] [blame]

608

_hextobyte = {(a + b).encode(): bytes.fromhex(a + b)

Victor Stinner

d6a91a7

2014-03-17 22:38:41 +0100

[diff] [blame]

609

for a in _hexdig for b in _hexdig}

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

610

for item in bits[1:]:

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

611

try:

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

612

append(_hextobyte[item[:2]])

append(item[2:])

except KeyError:

append(b'%')

append(item)

return b''.join(res)

_asciire = re.compile('([\x00-\x7f]+)')

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

620

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

621

def unquote(string, encoding='utf-8', errors='replace'):

622

"""Replace %xx escapes by their single-character equivalent. The optional

623

encoding and errors parameters specify how to decode percent-encoded

624

sequences into Unicode characters, as accepted by the bytes.decode()

625

method.

626

By default, percent-encoded sequences are decoded with UTF-8, and invalid

627

sequences are replaced by a placeholder character.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

628

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

629

unquote('abc%20def') -> 'abc def'.

630

"""

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

631

if '%' not in string:

632

string.split

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

return string

if encoding is None:

encoding = 'utf-8'

if errors is None:

errors = 'replace'

Serhiy Storchaka

2013-03-14 21:31:37 +0200

[diff] [blame]

638

bits = _asciire.split(string)

639

res = [bits[0]]

640

append = res.append

641

for i in range(1, len(bits), 2):

642

append(unquote_to_bytes(bits[i]).decode(encoding, errors))

643

append(bits[i + 1])

644

return ''.join(res)

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

645

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

646

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

647

def parse_qs(qs, keep_blank_values=False, strict_parsing=False,

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

648

encoding='utf-8', errors='replace', max_num_fields=None):

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

649

"""Parse a query given as a string argument.

Arguments:

Senthil Kumaran

2010-08-09 20:01:35 +0000

[diff] [blame]

653

qs: percent-encoded query string to be parsed

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

654

655

keep_blank_values: flag indicating whether blank values in

Senthil Kumaran

30e86a4

2010-08-09 20:01:35 +0000

[diff] [blame]

656

percent-encoded queries should be treated as blank strings.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

657

A true value indicates that blanks should be retained as

658

blank strings. The default false value indicates that

659

blank values are to be ignored and treated as if they were

660

not included.

661

662

strict_parsing: flag indicating what to do with parsing errors.

663

If false (the default), errors are silently ignored.

664

If true, errors raise a ValueError exception.

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

665

666

encoding and errors: specify how to decode percent-encoded sequences

667

into Unicode characters, as accepted by the bytes.decode() method.

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

668

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

669

max_num_fields: int. If set, then throws a ValueError if there

670

are more than n fields read by parse_qsl().

671

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

672

Returns a dictionary.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

673

"""

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

674

parsed_result = {}

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

675

pairs = parse_qsl(qs, keep_blank_values, strict_parsing,

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

676

encoding=encoding, errors=errors,

677

max_num_fields=max_num_fields)

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

678

for name, value in pairs:

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

679

if name in parsed_result:

680

parsed_result[name].append(value)

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

681

else:

Senthil Kumaran

2012-06-29 11:08:20 -0700

[diff] [blame]

682

parsed_result[name] = [value]

683

return parsed_result

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

684

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

685

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

686

def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

687

encoding='utf-8', errors='replace', max_num_fields=None):

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

688

"""Parse a query given as a string argument.

689

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

690

Arguments:

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

691

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

692

qs: percent-encoded query string to be parsed

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

693

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

694

keep_blank_values: flag indicating whether blank values in

695

percent-encoded queries should be treated as blank strings.

696

A true value indicates that blanks should be retained as blank

697

strings. The default false value indicates that blank values

698

are to be ignored and treated as if they were not included.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

699

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

700

strict_parsing: flag indicating what to do with parsing errors. If

701

false (the default), errors are silently ignored. If true,

702

errors raise a ValueError exception.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

703

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

704

encoding and errors: specify how to decode percent-encoded sequences

705

into Unicode characters, as accepted by the bytes.decode() method.

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

706

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

707

max_num_fields: int. If set, then throws a ValueError

708

if there are more than n fields read by parse_qsl().

709

Senthil Kumaran

2017-04-04 21:19:43 -0700

[diff] [blame]

710

Returns a list, as G-d intended.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

711

"""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

712

qs, _coerce_result = _coerce_args(qs)

matthewbelisle-wf

2018-10-19 05:52:59 -0500

[diff] [blame]

713

714

# If max_num_fields is defined then check that the number of fields

715

# is less than max_num_fields. This prevents a memory exhaustion DOS

716

# attack via post bodies with many fields.

717

if max_num_fields is not None:

718

num_fields = 1 + qs.count('&') + qs.count(';')

719

if max_num_fields < num_fields:

720

raise ValueError('Max number of fields exceeded')

721

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

722

pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]

723

r = []

724

for name_value in pairs:

725

if not name_value and not strict_parsing:

726

continue

727

nv = name_value.split('=', 1)

728

if len(nv) != 2:

729

if strict_parsing:

730

raise ValueError("bad query field: %r" % (name_value,))

731

# Handle case of a control-name with no equal sign

732

if keep_blank_values:

nv.append('')

else:

continue

if len(nv[1]) or keep_blank_values:

Victor Stinner

2011-01-14 12:52:12 +0000

[diff] [blame]

737

name = nv[0].replace('+', ' ')

738

name = unquote(name, encoding=encoding, errors=errors)

739

name = _coerce_result(name)

740

value = nv[1].replace('+', ' ')

741

value = unquote(value, encoding=encoding, errors=errors)

742

value = _coerce_result(value)

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

743

r.append((name, value))

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

744

return r

745

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

746

def unquote_plus(string, encoding='utf-8', errors='replace'):

747

"""Like unquote(), but also replace plus signs by spaces, as required for

748

unquoting HTML form values.

749

750

unquote_plus('%7e/abc+def') -> '~/abc def'

751

"""

752

string = string.replace('+', ' ')

753

return unquote(string, encoding, errors)

754

755

_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

756

b'abcdefghijklmnopqrstuvwxyz'

757

b'0123456789'

Ratnadeep Debnath

2017-02-25 14:30:28 +0530

[diff] [blame]

758

b'_.-~')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

759

_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)

760

_safe_quoters = {}

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

761

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

762

class Quoter(collections.defaultdict):

763

"""A mapping from bytes (in range(0,256)) to strings.

764

765

String values are percent-encoded byte values, unless the key < 128, and

766

in the "safe" set (either the specified safe set, or default set).

767

"""

768

# Keeps a cache internally, using defaultdict, for efficiency (lookups

769

# of cached keys don't call Python code at all).

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

770

def __init__(self, safe):

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

771

"""safe: bytes object."""

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

772

self.safe = _ALWAYS_SAFE.union(safe)

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

773

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

774

def __repr__(self):

775

# Without this, will just display as a defaultdict

Serhiy Storchaka

465e60e

2014-07-25 23:36:00 +0300

[diff] [blame]

776

return "<%s %r>" % (self.__class__.__name__, dict(self))

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

777

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

778

def __missing__(self, b):

779

# Handle a cache miss. Store quoted string in cache and return.

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

780

res = chr(b) if b in self.safe else '%{:02X}'.format(b)

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

self[b] = res

return res

def quote(string, safe='/', encoding=None, errors=None):

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

785

"""quote('abc def') -> 'abc%20def'

786

787

Each part of a URL, e.g. the path info, the query, etc., has a

788

different set of reserved characters that must be quoted.

789

Ratnadeep Debnath

2017-02-25 14:30:28 +0530

[diff] [blame]

790

RFC 3986 Uniform Resource Identifiers (URI): Generic Syntax lists

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

791

the following reserved characters.

792

793

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

Ratnadeep Debnath

2017-02-25 14:30:28 +0530

[diff] [blame]

794

"$" | "," | "~"

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

795

796

Each of these characters is reserved in some component of a URL,

797

but not necessarily in all of them.

798

Ratnadeep Debnath

2017-02-25 14:30:28 +0530

[diff] [blame]

799

Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings.

800

Now, "~" is included in the set of reserved characters.

801

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

802

By default, the quote function is intended for quoting the path

803

section of a URL. Thus, it will not encode '/'. This character

804

is reserved, but in typical usage the quote function is being

805

called on a path where the existing slash characters are used as

806

reserved characters.

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

807

R David Murray

8c4e112

2014-12-24 21:23:18 -0500

[diff] [blame]

808

string and safe may be either str or bytes objects. encoding and errors

809

must not be specified if string is a bytes object.

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

810

811

The optional encoding and errors parameters specify how to deal with

812

non-ASCII characters, as accepted by the str.encode method.

813

By default, encoding='utf-8' (characters are encoded with UTF-8), and

814

errors='strict' (unsupported characters raise a UnicodeEncodeError).

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

815

"""

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

816

if isinstance(string, str):

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

817

if not string:

818

return string

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

if encoding is None:

encoding = 'utf-8'

if errors is None:

errors = 'strict'

string = string.encode(encoding, errors)

824

else:

825

if encoding is not None:

826

raise TypeError("quote() doesn't support 'encoding' for bytes")

827

if errors is not None:

828

raise TypeError("quote() doesn't support 'errors' for bytes")

829

return quote_from_bytes(string, safe)

830

831

def quote_plus(string, safe='', encoding=None, errors=None):

832

"""Like quote(), but also replace ' ' with '+', as required for quoting

833

HTML form values. Plus signs in the original string are escaped unless

834

they are included in safe. It also does not have safe default to '/'.

835

"""

Jeremy Hylton

f819886

2009-03-26 16:55:08 +0000

[diff] [blame]

836

# Check if ' ' in string, where string may either be a str or bytes. If

837

# there are no spaces, the regular quote will produce the right answer.

838

if ((isinstance(string, str) and ' ' not in string) or

839

(isinstance(string, bytes) and b' ' not in string)):

840

return quote(string, safe, encoding, errors)

841

if isinstance(safe, str):

842

space = ' '

843

else:

844

space = b' '

Georg Brandl

faf4149

2009-05-26 18:31:11 +0000

[diff] [blame]

845

string = quote(string, safe + space, encoding, errors)

Jeremy Hylton

f819886

2009-03-26 16:55:08 +0000

[diff] [blame]

846

return string.replace(' ', '+')

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

847

848

def quote_from_bytes(bs, safe='/'):

849

"""Like quote(), but accepts a bytes object rather than a str, and does

850

not perform string-to-bytes encoding. It always returns an ASCII string.

Senthil Kumaran

ffa4b2c

2012-05-26 09:53:32 +0800

[diff] [blame]

851

quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

852

"""

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

853

if not isinstance(bs, (bytes, bytearray)):

854

raise TypeError("quote_from_bytes() expected bytes")

855

if not bs:

856

return ''

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

857

if isinstance(safe, str):

858

# Normalize 'safe' by converting to bytes and removing non-ASCII chars

859

safe = safe.encode('ascii', 'ignore')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

860

else:

861

safe = bytes([c for c in safe if c < 128])

862

if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):

863

return bs.decode()

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

864

try:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

865

quoter = _safe_quoters[safe]

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

866

except KeyError:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

867

_safe_quoters[safe] = quoter = Quoter(safe).__getitem__

868

return ''.join([quoter(char) for char in bs])

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

869

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

870

def urlencode(query, doseq=False, safe='', encoding=None, errors=None,

871

quote_via=quote_plus):

Senthil Kumaran

324ae385

2013-09-05 21:42:38 -0700

[diff] [blame]

872

"""Encode a dict or sequence of two-element tuples into a URL query string.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

873

874

If any values in the query arg are sequences and doseq is true, each

875

sequence element is converted to a separate parameter.

876

877

If the query arg is a sequence of two-element tuples, the order of the

878

parameters in the output will match the order of parameters in the

879

input.

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

880

Senthil Kumaran

324ae385

2013-09-05 21:42:38 -0700

[diff] [blame]

881

The components of a query arg may each be either a string or a bytes type.

R David Murray

8c4e112

2014-12-24 21:23:18 -0500

[diff] [blame]

882

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

883

The safe, encoding, and errors parameters are passed down to the function

884

specified by quote_via (encoding and errors only if a component is a str).

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

885

"""

886

Jeremy Hylton

a4de60a

2009-03-26 14:49:26 +0000

[diff] [blame]

887

if hasattr(query, "items"):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

888

query = query.items()

889

else:

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

890

# It's a bother at times that strings and string-like objects are

891

# sequences.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

892

try:

893

# non-sequence items should not work with len()

894

# non-empty strings will fail this

895

if len(query) and not isinstance(query[0], tuple):

896

raise TypeError

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

897

# Zero-length sequences of all types will get here and succeed,

898

# but that's a minor nit. Since the original implementation

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

899

# allowed empty dicts that type of behavior probably should be

900

# preserved for consistency

901

except TypeError:

Jeremy Hylton

a4de60a

2009-03-26 14:49:26 +0000

[diff] [blame]

902

ty, va, tb = sys.exc_info()

903

raise TypeError("not a valid non-string sequence "

904

"or mapping object").with_traceback(tb)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

905

906

l = []

907

if not doseq:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

908

for k, v in query:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

909

if isinstance(k, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

910

k = quote_via(k, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

911

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

912

k = quote_via(str(k), safe, encoding, errors)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

913

914

if isinstance(v, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

915

v = quote_via(v, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

916

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

917

v = quote_via(str(v), safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

918

l.append(k + '=' + v)

919

else:

920

for k, v in query:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

921

if isinstance(k, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

922

k = quote_via(k, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

923

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

924

k = quote_via(str(k), safe, encoding, errors)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

925

926

if isinstance(v, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

927

v = quote_via(v, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

928

l.append(k + '=' + v)

929

elif isinstance(v, str):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

930

v = quote_via(v, safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

931

l.append(k + '=' + v)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

932

else:

933

try:

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

934

# Is this a sufficient test for sequence-ness?

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

935

x = len(v)

936

except TypeError:

937

# not a sequence

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

938

v = quote_via(str(v), safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

939

l.append(k + '=' + v)

940

else:

941

# loop over the sequence

942

for elt in v:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

943

if isinstance(elt, bytes):

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

944

elt = quote_via(elt, safe)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

945

else:

R David Murray

2015-05-17 20:44:50 -0400

[diff] [blame]

946

elt = quote_via(str(elt), safe, encoding, errors)

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

947

l.append(k + '=' + elt)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

948

return '&'.join(l)

949

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

950

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

951

def to_bytes(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

952

warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8",

953

DeprecationWarning, stacklevel=2)

954

return _to_bytes(url)

955

956

957

def _to_bytes(url):

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

958

"""to_bytes(u"URL") --> 'URL'."""

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

959

# Most URL schemes require ASCII. If that changes, the conversion

960

# can be relaxed.

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

961

# XXX get rid of to_bytes()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

962

if isinstance(url, str):

963

try:

964

url = url.encode("ASCII").decode()

965

except UnicodeError:

966

raise UnicodeError("URL " + repr(url) +

967

" contains non-ASCII characters")

968

return url

969

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

970

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

971

def unwrap(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

972

warnings.warn("urllib.parse.unwrap() is deprecated as of 3.8",

973

DeprecationWarning, stacklevel=2)

return _unwrap(url)

def _unwrap(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

978

"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""

979

url = str(url).strip()

980

if url[:1] == '<' and url[-1:] == '>':

981

url = url[1:-1].strip()

982

if url[:4] == 'URL:': url = url[4:].strip()

983

return url

984

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

985

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

986

def splittype(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

987

warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, "

988

"use urllib.parse.urlparse() instead",

989

DeprecationWarning, stacklevel=2)

990

return _splittype(url)

_typeprog = None

def _splittype(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

995

"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""

996

global _typeprog

997

if _typeprog is None:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

998

_typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

999

1000

match = _typeprog.match(url)

1001

if match:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1002

scheme, data = match.groups()

1003

return scheme.lower(), data

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1004

return None, url

1005

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1006

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1007

def splithost(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1008

warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, "

1009

"use urllib.parse.urlparse() instead",

1010

DeprecationWarning, stacklevel=2)

1011

return _splithost(url)

_hostprog = None

def _splithost(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1016

"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""

1017

global _hostprog

1018

if _hostprog is None:

postmasters

90e01e5

2017-06-20 06:02:44 -0700

[diff] [blame]

1019

_hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1020

1021

match = _hostprog.match(url)

Senthil Kumaran

c295862

2010-11-22 04:48:26 +0000

[diff] [blame]

1022

if match:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1023

host_port, path = match.groups()

1024

if path and path[0] != '/':

Senthil Kumaran

c295862

2010-11-22 04:48:26 +0000

[diff] [blame]

1025

path = '/' + path

1026

return host_port, path

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1027

return None, url

1028

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1029

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1030

def splituser(host):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1031

warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, "

1032

"use urllib.parse.urlparse() instead",

1033

DeprecationWarning, stacklevel=2)

1034

return _splituser(host)

1035

1036

1037

def _splituser(host):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1038

"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1039

user, delim, host = host.rpartition('@')

1040

return (user if delim else None), host

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1041

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1042

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1043

def splitpasswd(user):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1044

warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, "

1045

"use urllib.parse.urlparse() instead",

1046

DeprecationWarning, stacklevel=2)

1047

return _splitpasswd(user)

1048

1049

1050

def _splitpasswd(user):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1051

"""splitpasswd('user:passwd') -> 'user', 'passwd'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1052

user, delim, passwd = user.partition(':')

1053

return user, (passwd if delim else None)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1054

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1055

1056

def splitport(host):

1057

warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, "

1058

"use urllib.parse.urlparse() instead",

1059

DeprecationWarning, stacklevel=2)

1060

return _splitport(host)

1061

1062

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1063

# splittag('/path#tag') --> '/path', 'tag'

1064

_portprog = None

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1065

def _splitport(host):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1066

"""splitport('host:port') --> 'host', 'port'."""

1067

global _portprog

1068

if _portprog is None:

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1069

_portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1070

1071

match = _portprog.match(host)

Serhiy Storchaka

2014-01-18 18:30:33 +0200

[diff] [blame]

1072

if match:

1073

host, port = match.groups()

1074

if port:

1075

return host, port

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1076

return host, None

1077

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1078

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1079

def splitnport(host, defport=-1):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1080

warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, "

1081

"use urllib.parse.urlparse() instead",

1082

DeprecationWarning, stacklevel=2)

1083

return _splitnport(host, defport)

1084

1085

1086

def _splitnport(host, defport=-1):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1087

"""Split host and port, returning numeric port.

1088

Return given default port if no ':' found; defaults to -1.

1089

Return numerical port if a valid number are found after ':'.

1090

Return None if ':' but not a valid number."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1091

host, delim, port = host.rpartition(':')

if not delim:

host = port

elif port:

try:

nport = int(port)

except ValueError:

nport = None

return host, nport

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1100

return host, defport

1101

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1102

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1103

def splitquery(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1104

warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, "

1105

"use urllib.parse.urlparse() instead",

1106

DeprecationWarning, stacklevel=2)

1107

return _splitquery(url)

1108

1109

1110

def _splitquery(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1111

"""splitquery('/path?query') --> '/path', 'query'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1112

path, delim, query = url.rpartition('?')

1113

if delim:

1114

return path, query

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1115

return url, None

1116

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1117

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1118

def splittag(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1119

warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, "

1120

"use urllib.parse.urlparse() instead",

1121

DeprecationWarning, stacklevel=2)

1122

return _splittag(url)

1123

1124

1125

def _splittag(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1126

"""splittag('/path#tag') --> '/path', 'tag'."""

Serhiy Storchaka

2015-03-03 20:21:35 +0200

[diff] [blame]

1127

path, delim, tag = url.rpartition('#')

1128

if delim:

1129

return path, tag

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1130

return url, None

1131

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1132

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1133

def splitattr(url):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1134

warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, "

1135

"use urllib.parse.urlparse() instead",

1136

DeprecationWarning, stacklevel=2)

1137

return _splitattr(url)

1138

1139

1140

def _splitattr(url):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1141

"""splitattr('/path;attr1=value1;attr2=value2;...') ->

1142

'/path', ['attr1=value1', 'attr2=value2', ...]."""

1143

words = url.split(';')

1144

return words[0], words[1:]

1145

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1146

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1147

def splitvalue(attr):

Cheryl Sabella

2018-04-25 16:51:54 -0700

[diff] [blame]

1148

warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, "

1149

"use urllib.parse.parse_qsl() instead",

1150

DeprecationWarning, stacklevel=2)

1151

return _splitvalue(attr)

1152

1153

1154

def _splitvalue(attr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

1155

"""splitvalue('attr=value') --> 'attr', 'value'."""

Serhiy Storchaka