Blame - Lib/urllib/parse.py - platform/external/python/cpython2

2008-06-18 20:49:58 +0000

[diff] [blame]

1

"""Parse (absolute and relative) URLs.

2

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

3

urlparse module is based upon the following RFC specifications.

4

5

RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding

6

and L. Masinter, January 2005.

7

8

RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter

9

and L.Masinter, December 1999.

10

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

11

RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

12

Berners-Lee, R. Fielding, and L. Masinter, August 1998.

13

David Malcolm

ee25568

2010-12-02 16:41:00 +0000

[diff] [blame]

14

RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

15

16

RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June

17

1995.

18

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

19

RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

20

McCahill, December 1994

21

Benjamin Peterson

d7c3ed5

2010-06-27 22:32:30 +0000

[diff] [blame]

22

RFC 3986 is considered the current standard and any future changes to

23

urlparse module should conform with it. The urlparse module is

24

currently not entirely compliant with this RFC due to defacto

25

scenarios for parsing, and for backward compatibility purposes, some

26

parsing quirks from older RFCs are retained. The testcases in

Senthil Kumaran

2010-04-17 14:44:14 +0000

[diff] [blame]

27

test_urlparse.py provides a good indicator of parsing behavior.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

28

"""

29

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

30

import sys

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

31

import collections

Facundo Batista

2ac5de2

2008-07-07 18:24:11 +0000

[diff] [blame]

32

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

33

__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",

Senthil Kumaran

0256b2a

2010-10-25 16:36:20 +0000

[diff] [blame]

34

"urlsplit", "urlunsplit", "urlencode", "parse_qs",

35

"parse_qsl", "quote", "quote_plus", "quote_from_bytes",

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

36

"unquote", "unquote_plus", "unquote_to_bytes"]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

37

38

# A classification of schemes ('' means apply by default)

39

uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',

40

'wais', 'file', 'https', 'shttp', 'mms',

41

'prospero', 'rtsp', 'rtspu', '', 'sftp']

42

uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',

43

'imap', 'wais', 'file', 'mms', 'https', 'shttp',

44

'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

45

'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

46

non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',

47

'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']

48

uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',

49

'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',

50

'mms', '', 'sftp']

51

uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',

52

'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']

53

uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',

54

'nntp', 'wais', 'https', 'shttp', 'snews',

55

'file', 'prospero', '']

56

57

# Characters valid in scheme names

58

scheme_chars = ('abcdefghijklmnopqrstuvwxyz'

59

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

'0123456789'

'+-.')

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

63

# XXX: Consider replacing with functools.lru_cache

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

MAX_CACHE_SIZE = 20

_parse_cache = {}

def clear_cache():

Antoine Pitrou

2df5fc7

2009-12-08 19:38:17 +0000

[diff] [blame]

68

"""Clear the parse cache and the quoters cache."""

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

69

_parse_cache.clear()

Antoine Pitrou

2df5fc7

2009-12-08 19:38:17 +0000

[diff] [blame]

70

_safe_quoters.clear()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

71

72

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

73

# Helpers for bytes handling

74

# For 3.2, we deliberately require applications that

75

# handle improperly quoted URLs to do their own

76

# decoding and encoding. If valid use cases are

77

# presented, we may relax this by using latin-1

78

# decoding internally for 3.3

79

_implicit_encoding = 'ascii'

80

_implicit_errors = 'strict'

def _noop(obj):

return obj

def _encode_result(obj, encoding=_implicit_encoding,

86

errors=_implicit_errors):

87

return obj.encode(encoding, errors)

88

89

def _decode_args(args, encoding=_implicit_encoding,

90

errors=_implicit_errors):

91

return tuple(x.decode(encoding, errors) if x else '' for x in args)

92

93

def _coerce_args(*args):

94

# Invokes decode if necessary to create str args

95

# and returns the coerced inputs along with

96

# an appropriate result coercion function

97

# - noop for str inputs

98

# - encoding function otherwise

99

str_input = isinstance(args[0], str)

100

for arg in args[1:]:

101

# We special-case the empty string to support the

102

# "scheme=''" default argument to some functions

103

if arg and isinstance(arg, str) != str_input:

104

raise TypeError("Cannot mix str and non-str arguments")

105

if str_input:

106

return args + (_noop,)

107

return _decode_args(args) + (_encode_result,)

108

109

# Result objects are more helpful than simple tuples

110

class _ResultMixinStr(object):

111

"""Standard approach to encoding parsed results from str to bytes"""

112

__slots__ = ()

113

114

def encode(self, encoding='ascii', errors='strict'):

115

return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))

116

117

118

class _ResultMixinBytes(object):

119

"""Standard approach to decoding parsed results from bytes to str"""

120

__slots__ = ()

121

122

def decode(self, encoding='ascii', errors='strict'):

123

return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))

124

125

126

class _NetlocResultMixinBase(object):

127

"""Shared methods for the parsed result objects containing a netloc element"""

128

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

129

130

@property

131

def username(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

132

return self._userinfo[0]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

133

134

@property

135

def password(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

136

return self._userinfo[1]

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

137

138

@property

139

def hostname(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

140

hostname = self._hostinfo[0]

141

if not hostname:

142

hostname = None

143

elif hostname is not None:

144

hostname = hostname.lower()

145

return hostname

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

146

147

@property

148

def port(self):

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

149

port = self._hostinfo[1]

if port is not None:

port = int(port, 10)

return port

class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):

__slots__ = ()

@property

def _userinfo(self):

netloc = self.netloc

userinfo, have_info, hostinfo = netloc.rpartition('@')

162

if have_info:

163

username, have_password, password = userinfo.partition(':')

164

if not have_password:

165

password = None

Senthil Kumaran

ad02d23

2010-04-16 03:02:13 +0000

[diff] [blame]

166

else:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

167

username = password = None

168

return username, password

@property

def _hostinfo(self):

netloc = self.netloc

_, _, hostinfo = netloc.rpartition('@')

174

_, have_open_br, bracketed = hostinfo.partition('[')

175

if have_open_br:

176

hostname, _, port = bracketed.partition(']')

177

_, have_port, port = port.partition(':')

178

else:

179

hostname, have_port, port = hostinfo.partition(':')

180

if not have_port:

181

port = None

182

return hostname, port

183

184

185

class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):

__slots__ = ()

@property

def _userinfo(self):

netloc = self.netloc

userinfo, have_info, hostinfo = netloc.rpartition(b'@')

192

if have_info:

193

username, have_password, password = userinfo.partition(b':')

194

if not have_password:

195

password = None

196

else:

197

username = password = None

198

return username, password

@property

def _hostinfo(self):

netloc = self.netloc

_, _, hostinfo = netloc.rpartition(b'@')

204

_, have_open_br, bracketed = hostinfo.partition(b'[')

205

if have_open_br:

206

hostname, _, port = bracketed.partition(b']')

207

_, have_port, port = port.partition(b':')

208

else:

209

hostname, have_port, port = hostinfo.partition(b':')

210

if not have_port:

211

port = None

212

return hostname, port

213

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

214

215

from collections import namedtuple

216

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

217

_DefragResultBase = namedtuple('DefragResult', 'url fragment')

218

_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment')

219

_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment')

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

220

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

221

# For backwards compatibility, alias _NetlocResultMixinStr

222

# ResultBase is no longer part of the documented API, but it is

223

# retained since deprecating it isn't worth the hassle

224

ResultBase = _NetlocResultMixinStr

225

226

# Structured result objects for string data

227

class DefragResult(_DefragResultBase, _ResultMixinStr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

228

__slots__ = ()

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

229

def geturl(self):

230

if self.fragment:

231

return self.url + '#' + self.fragment

232

else:

233

return self.url

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

234

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

235

class SplitResult(_SplitResultBase, _NetlocResultMixinStr):

236

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

237

def geturl(self):

238

return urlunsplit(self)

239

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

240

class ParseResult(_ParseResultBase, _NetlocResultMixinStr):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

241

__slots__ = ()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

242

def geturl(self):

243

return urlunparse(self)

244

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

245

# Structured result objects for bytes data

246

class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):

__slots__ = ()

def geturl(self):

if self.fragment:

return self.url + b'#' + self.fragment

else:

return self.url

class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):

255

__slots__ = ()

256

def geturl(self):

257

return urlunsplit(self)

258

259

class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):

260

__slots__ = ()

261

def geturl(self):

262

return urlunparse(self)

263

264

# Set up the encode/decode result pairs

265

def _fix_result_transcoding():

266

_result_pairs = (

267

(DefragResult, DefragResultBytes),

268

(SplitResult, SplitResultBytes),

269

(ParseResult, ParseResultBytes),

270

)

271

for _decoded, _encoded in _result_pairs:

272

_decoded._encoded_counterpart = _encoded

273

_encoded._decoded_counterpart = _decoded

274

275

_fix_result_transcoding()

276

del _fix_result_transcoding

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

277

278

def urlparse(url, scheme='', allow_fragments=True):

279

"""Parse a URL into 6 components:

280

281

Return a 6-tuple: (scheme, netloc, path, params, query, fragment).

282

Note that we don't break the components up in smaller bits

283

(e.g. netloc is a single string) and we don't expand % escapes."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

284

url, scheme, _coerce_result = _coerce_args(url, scheme)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

285

tuple = urlsplit(url, scheme, allow_fragments)

286

scheme, netloc, url, query, fragment = tuple

287

if scheme in uses_params and ';' in url:

288

url, params = _splitparams(url)

289

else:

290

params = ''

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

291

result = ParseResult(scheme, netloc, url, params, query, fragment)

292

return _coerce_result(result)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

293

294

def _splitparams(url):

295

if '/' in url:

296

i = url.find(';', url.rfind('/'))

if i < 0:

return url, ''

else:

i = url.find(';')

return url[:i], url[i+1:]

302

303

def _splitnetloc(url, start=0):

304

delim = len(url) # position of end of domain part of url, default is end

305

for c in '/?#': # look for delimiters; the order is NOT important

306

wdelim = url.find(c, start) # find first of this delim

307

if wdelim >= 0: # if found

308

delim = min(delim, wdelim) # use earliest delim position

309

return url[start:delim], url[delim:] # return (domain, rest)

310

311

def urlsplit(url, scheme='', allow_fragments=True):

312

"""Parse a URL into 5 components:

313

314

Return a 5-tuple: (scheme, netloc, path, query, fragment).

315

Note that we don't break the components up in smaller bits

316

(e.g. netloc is a single string) and we don't expand % escapes."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

317

url, scheme, _coerce_result = _coerce_args(url, scheme)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

318

allow_fragments = bool(allow_fragments)

319

key = url, scheme, allow_fragments, type(url), type(scheme)

320

cached = _parse_cache.get(key, None)

321

if cached:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

322

return _coerce_result(cached)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

323

if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth

324

clear_cache()

325

netloc = query = fragment = ''

326

i = url.find(':')

327

if i > 0:

328

if url[:i] == 'http': # optimize the common case

329

scheme = url[:i].lower()

330

url = url[i+1:]

331

if url[:2] == '//':

332

netloc, url = _splitnetloc(url, 2)

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

333

if (('[' in netloc and ']' not in netloc) or

334

(']' in netloc and '[' not in netloc)):

335

raise ValueError("Invalid IPv6 URL")

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

336

if allow_fragments and '#' in url:

337

url, fragment = url.split('#', 1)

338

if '?' in url:

339

url, query = url.split('?', 1)

340

v = SplitResult(scheme, netloc, url, query, fragment)

341

_parse_cache[key] = v

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

342

return _coerce_result(v)

Senthil Kumaran

84c7d9f

2010-08-04 04:50:44 +0000

[diff] [blame]

343

if url.endswith(':') or not url[i+1].isdigit():

344

for c in url[:i]:

345

if c not in scheme_chars:

346

break

347

else:

348

scheme, url = url[:i].lower(), url[i+1:]

Senthil Kumaran

6be85c5

2010-02-19 07:42:50 +0000

[diff] [blame]

349

if url[:2] == '//':

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

350

netloc, url = _splitnetloc(url, 2)

Senthil Kumaran

7a1e09f

2010-04-22 12:19:46 +0000

[diff] [blame]

351

if (('[' in netloc and ']' not in netloc) or

352

(']' in netloc and '[' not in netloc)):

353

raise ValueError("Invalid IPv6 URL")

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

354

if allow_fragments and scheme in uses_fragment and '#' in url:

355

url, fragment = url.split('#', 1)

356

if scheme in uses_query and '?' in url:

357

url, query = url.split('?', 1)

358

v = SplitResult(scheme, netloc, url, query, fragment)

359

_parse_cache[key] = v

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

360

return _coerce_result(v)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

361

362

def urlunparse(components):

363

"""Put a parsed URL back together again. This may result in a

364

slightly different, but equivalent URL, if the URL that was parsed

365

originally had redundant delimiters, e.g. a ? with an empty query

366

(the draft states that these are equivalent)."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

367

scheme, netloc, url, params, query, fragment, _coerce_result = (

368

_coerce_args(*components))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

369

if params:

370

url = "%s;%s" % (url, params)

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

371

return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

372

373

def urlunsplit(components):

Senthil Kumaran

8749a63

2010-06-28 14:08:00 +0000

[diff] [blame]

374

"""Combine the elements of a tuple as returned by urlsplit() into a

375

complete URL as a string. The data argument can be any five-item iterable.

376

This may result in a slightly different, but equivalent URL, if the URL that

377

was parsed originally had unnecessary delimiters (for example, a ? with an

378

empty query; the RFC states that these are equivalent)."""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

379

scheme, netloc, url, query, fragment, _coerce_result = (

380

_coerce_args(*components))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

381

if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):

382

if url and url[:1] != '/': url = '/' + url

383

url = '//' + (netloc or '') + url

384

if scheme:

385

url = scheme + ':' + url

386

if query:

387

url = url + '?' + query

388

if fragment:

389

url = url + '#' + fragment

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

390

return _coerce_result(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

391

392

def urljoin(base, url, allow_fragments=True):

393

"""Join a base URL and a possibly relative URL to form an absolute

394

interpretation of the latter."""

if not base:

return url

if not url:

return base

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

399

base, url, _coerce_result = _coerce_args(base, url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

400

bscheme, bnetloc, bpath, bparams, bquery, bfragment = \

401

urlparse(base, '', allow_fragments)

402

scheme, netloc, path, params, query, fragment = \

403

urlparse(url, bscheme, allow_fragments)

404

if scheme != bscheme or scheme not in uses_relative:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

405

return _coerce_result(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

406

if scheme in uses_netloc:

407

if netloc:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

408

return _coerce_result(urlunparse((scheme, netloc, path,

409

params, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

410

netloc = bnetloc

411

if path[:1] == '/':

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

412

return _coerce_result(urlunparse((scheme, netloc, path,

413

params, query, fragment)))

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame^]

414

if not path and not params:

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

415

path = bpath

Senthil Kumaran

dca5b86

2010-12-17 04:48:45 +0000

[diff] [blame^]

416

params = bparams

Facundo Batista

23e3856

2008-08-14 16:55:14 +0000

[diff] [blame]

417

if not query:

418

query = bquery

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

419

return _coerce_result(urlunparse((scheme, netloc, path,

420

params, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

421

segments = bpath.split('/')[:-1] + path.split('/')

422

# XXX The stuff below is bogus in various ways...

423

if segments[-1] == '.':

424

segments[-1] = ''

425

while '.' in segments:

segments.remove('.')

while 1:

i = 1

n = len(segments) - 1

430

while i < n:

431

if (segments[i] == '..'

432

and segments[i-1] not in ('', '..')):

433

del segments[i-1:i+1]

break

i = i+1

else:

break

if segments == ['', '..']:

439

segments[-1] = ''

440

elif len(segments) >= 2 and segments[-1] == '..':

441

segments[-2:] = ['']

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

442

return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),

443

params, query, fragment)))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

444

445

def urldefrag(url):

446

"""Removes any existing fragment from URL.

447

448

Returns a tuple of the defragmented URL and the fragment. If

449

the URL contained no fragments, the second element is the

450

empty string.

451

"""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

452

url, _coerce_result = _coerce_args(url)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

453

if '#' in url:

454

s, n, p, a, q, frag = urlparse(url)

455

defrag = urlunparse((s, n, p, a, q, ''))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

456

else:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

457

frag = ''

458

defrag = url

459

return _coerce_result(DefragResult(defrag, frag))

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

460

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

461

def unquote_to_bytes(string):

462

"""unquote_to_bytes('abc%20def') -> b'abc def'."""

463

# Note: strings are encoded as UTF-8. This is only an issue if it contains

464

# unescaped non-ASCII characters, which URIs should not.

Florent Xicluna

82a3f8a

2010-08-14 18:30:35 +0000

[diff] [blame]

465

if not string:

466

# Is it a string-like object?

467

string.split

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

468

return b''

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

469

if isinstance(string, str):

470

string = string.encode('utf-8')

471

res = string.split(b'%')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

if len(res) == 1:

return string

string = res[0]

for item in res[1:]:

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

476

try:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

477

string += bytes([int(item[:2], 16)]) + item[2:]

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

478

except ValueError:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

479

string += b'%' + item

480

return string

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

481

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

482

def unquote(string, encoding='utf-8', errors='replace'):

483

"""Replace %xx escapes by their single-character equivalent. The optional

484

encoding and errors parameters specify how to decode percent-encoded

485

sequences into Unicode characters, as accepted by the bytes.decode()

486

method.

487

By default, percent-encoded sequences are decoded with UTF-8, and invalid

488

sequences are replaced by a placeholder character.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

489

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

490

unquote('abc%20def') -> 'abc def'.

491

"""

Florent Xicluna

c049fca

2010-07-31 08:56:55 +0000

[diff] [blame]

492

if string == '':

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

493

return string

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

494

res = string.split('%')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

if len(res) == 1:

return string

if encoding is None:

encoding = 'utf-8'

if errors is None:

errors = 'replace'

Florent Xicluna

0f78a94

2010-05-17 18:01:22 +0000

[diff] [blame]

501

# pct_sequence: contiguous sequence of percent-encoded bytes, decoded

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

502

pct_sequence = b''

503

string = res[0]

504

for item in res[1:]:

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

505

try:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

506

if not item:

507

raise ValueError

508

pct_sequence += bytes.fromhex(item[:2])

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

509

rest = item[2:]

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

510

if not rest:

511

# This segment was just a single percent-encoded character.

512

# May be part of a sequence of code units, so delay decoding.

513

# (Stored in pct_sequence).

514

continue

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

515

except ValueError:

516

rest = '%' + item

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

517

# Encountered non-percent-encoded characters. Flush the current

518

# pct_sequence.

519

string += pct_sequence.decode(encoding, errors) + rest

520

pct_sequence = b''

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

521

if pct_sequence:

522

# Flush the final pct_sequence

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

523

string += pct_sequence.decode(encoding, errors)

524

return string

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

525

Georg Brandl

3d6575d

2009-09-16 14:36:22 +0000

[diff] [blame]

526

def parse_qs(qs, keep_blank_values=False, strict_parsing=False):

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

527

"""Parse a query given as a string argument.

Arguments:

Senthil Kumaran

2010-08-09 20:01:35 +0000

[diff] [blame]

531

qs: percent-encoded query string to be parsed

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

532

533

keep_blank_values: flag indicating whether blank values in

Senthil Kumaran

30e86a4

2010-08-09 20:01:35 +0000

[diff] [blame]

534

percent-encoded queries should be treated as blank strings.

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

535

A true value indicates that blanks should be retained as

536

blank strings. The default false value indicates that

537

blank values are to be ignored and treated as if they were

538

not included.

539

540

strict_parsing: flag indicating what to do with parsing errors.

541

If false (the default), errors are silently ignored.

542

If true, errors raise a ValueError exception.

543

"""

544

dict = {}

545

for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):

546

if name in dict:

547

dict[name].append(value)

else:

dict[name] = [value]

return dict

Georg Brandl

2009-09-16 14:36:22 +0000

[diff] [blame]

552

def parse_qsl(qs, keep_blank_values=False, strict_parsing=False):

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

553

"""Parse a query given as a string argument.

Arguments:

Senthil Kumaran

2010-08-09 20:01:35 +0000

[diff] [blame]

557

qs: percent-encoded query string to be parsed

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

558

559

keep_blank_values: flag indicating whether blank values in

Senthil Kumaran

30e86a4

2010-08-09 20:01:35 +0000

[diff] [blame]

560

percent-encoded queries should be treated as blank strings. A

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

561

true value indicates that blanks should be retained as blank

562

strings. The default false value indicates that blank values

563

are to be ignored and treated as if they were not included.

564

565

strict_parsing: flag indicating what to do with parsing errors. If

566

false (the default), errors are silently ignored. If true,

567

errors raise a ValueError exception.

568

569

Returns a list, as G-d intended.

570

"""

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

571

qs, _coerce_result = _coerce_args(qs)

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

572

pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]

573

r = []

574

for name_value in pairs:

575

if not name_value and not strict_parsing:

576

continue

577

nv = name_value.split('=', 1)

578

if len(nv) != 2:

579

if strict_parsing:

580

raise ValueError("bad query field: %r" % (name_value,))

581

# Handle case of a control-name with no equal sign

582

if keep_blank_values:

nv.append('')

else:

continue

if len(nv[1]) or keep_blank_values:

Nick Coghlan

2010-11-30 15:48:08 +0000

[diff] [blame]

587

name = _coerce_result(unquote(nv[0].replace('+', ' ')))

588

value = _coerce_result(unquote(nv[1].replace('+', ' ')))

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

589

r.append((name, value))

Facundo Batista

2008-09-03 22:49:01 +0000

[diff] [blame]

590

return r

591

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

592

def unquote_plus(string, encoding='utf-8', errors='replace'):

593

"""Like unquote(), but also replace plus signs by spaces, as required for

594

unquoting HTML form values.

595

596

unquote_plus('%7e/abc+def') -> '~/abc def'

597

"""

598

string = string.replace('+', ' ')

599

return unquote(string, encoding, errors)

600

601

_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

602

b'abcdefghijklmnopqrstuvwxyz'

603

b'0123456789'

604

b'_.-')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

605

_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)

606

_safe_quoters = {}

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

607

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

608

class Quoter(collections.defaultdict):

609

"""A mapping from bytes (in range(0,256)) to strings.

610

611

String values are percent-encoded byte values, unless the key < 128, and

612

in the "safe" set (either the specified safe set, or default set).

613

"""

614

# Keeps a cache internally, using defaultdict, for efficiency (lookups

615

# of cached keys don't call Python code at all).

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

616

def __init__(self, safe):

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

617

"""safe: bytes object."""

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

618

self.safe = _ALWAYS_SAFE.union(safe)

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

619

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

620

def __repr__(self):

621

# Without this, will just display as a defaultdict

622

return "<Quoter %r>" % dict(self)

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

623

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

624

def __missing__(self, b):

625

# Handle a cache miss. Store quoted string in cache and return.

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

626

res = chr(b) if b in self.safe else '%{:02X}'.format(b)

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

self[b] = res

return res

def quote(string, safe='/', encoding=None, errors=None):

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

631

"""quote('abc def') -> 'abc%20def'

632

633

Each part of a URL, e.g. the path info, the query, etc., has a

634

different set of reserved characters that must be quoted.

635

636

RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists

637

the following reserved characters.

638

639

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

640

"$" | ","

641

642

Each of these characters is reserved in some component of a URL,

643

but not necessarily in all of them.

644

645

By default, the quote function is intended for quoting the path

646

section of a URL. Thus, it will not encode '/'. This character

647

is reserved, but in typical usage the quote function is being

648

called on a path where the existing slash characters are used as

649

reserved characters.

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

650

651

string and safe may be either str or bytes objects. encoding must

652

not be specified if string is a str.

653

654

The optional encoding and errors parameters specify how to deal with

655

non-ASCII characters, as accepted by the str.encode method.

656

By default, encoding='utf-8' (characters are encoded with UTF-8), and

657

errors='strict' (unsupported characters raise a UnicodeEncodeError).

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

658

"""

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

659

if isinstance(string, str):

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

660

if not string:

661

return string

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

if encoding is None:

encoding = 'utf-8'

if errors is None:

errors = 'strict'

string = string.encode(encoding, errors)

667

else:

668

if encoding is not None:

669

raise TypeError("quote() doesn't support 'encoding' for bytes")

670

if errors is not None:

671

raise TypeError("quote() doesn't support 'errors' for bytes")

672

return quote_from_bytes(string, safe)

673

674

def quote_plus(string, safe='', encoding=None, errors=None):

675

"""Like quote(), but also replace ' ' with '+', as required for quoting

676

HTML form values. Plus signs in the original string are escaped unless

677

they are included in safe. It also does not have safe default to '/'.

678

"""

Jeremy Hylton

f819886

2009-03-26 16:55:08 +0000

[diff] [blame]

679

# Check if ' ' in string, where string may either be a str or bytes. If

680

# there are no spaces, the regular quote will produce the right answer.

681

if ((isinstance(string, str) and ' ' not in string) or

682

(isinstance(string, bytes) and b' ' not in string)):

683

return quote(string, safe, encoding, errors)

684

if isinstance(safe, str):

685

space = ' '

686

else:

687

space = b' '

Georg Brandl

faf4149

2009-05-26 18:31:11 +0000

[diff] [blame]

688

string = quote(string, safe + space, encoding, errors)

Jeremy Hylton

f819886

2009-03-26 16:55:08 +0000

[diff] [blame]

689

return string.replace(' ', '+')

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

690

691

def quote_from_bytes(bs, safe='/'):

692

"""Like quote(), but accepts a bytes object rather than a str, and does

693

not perform string-to-bytes encoding. It always returns an ASCII string.

694

quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB'

695

"""

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

696

if not isinstance(bs, (bytes, bytearray)):

697

raise TypeError("quote_from_bytes() expected bytes")

698

if not bs:

699

return ''

Guido van Rossum

2008-08-18 21:44:30 +0000

[diff] [blame]

700

if isinstance(safe, str):

701

# Normalize 'safe' by converting to bytes and removing non-ASCII chars

702

safe = safe.encode('ascii', 'ignore')

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

703

else:

704

safe = bytes([c for c in safe if c < 128])

705

if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):

706

return bs.decode()

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

707

try:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

708

quoter = _safe_quoters[safe]

Guido van Rossum

2008-08-06 19:31:34 +0000

[diff] [blame]

709

except KeyError:

Florent Xicluna

2010-05-17 17:33:07 +0000

[diff] [blame]

710

_safe_quoters[safe] = quoter = Quoter(safe).__getitem__

711

return ''.join([quoter(char) for char in bs])

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

712

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

713

def urlencode(query, doseq=False, safe='', encoding=None, errors=None):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

714

"""Encode a sequence of two-element tuples or dictionary into a URL query string.

715

716

If any values in the query arg are sequences and doseq is true, each

717

sequence element is converted to a separate parameter.

718

719

If the query arg is a sequence of two-element tuples, the order of the

720

parameters in the output will match the order of parameters in the

721

input.

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

722

723

The query arg may be either a string or a bytes type. When query arg is a

724

string, the safe, encoding and error parameters are sent the quote_plus for

725

encoding.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

726

"""

727

Jeremy Hylton

a4de60a

2009-03-26 14:49:26 +0000

[diff] [blame]

728

if hasattr(query, "items"):

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

729

query = query.items()

730

else:

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

731

# It's a bother at times that strings and string-like objects are

732

# sequences.

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

733

try:

734

# non-sequence items should not work with len()

735

# non-empty strings will fail this

736

if len(query) and not isinstance(query[0], tuple):

737

raise TypeError

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

738

# Zero-length sequences of all types will get here and succeed,

739

# but that's a minor nit. Since the original implementation

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

740

# allowed empty dicts that type of behavior probably should be

741

# preserved for consistency

742

except TypeError:

Jeremy Hylton

a4de60a

2009-03-26 14:49:26 +0000

[diff] [blame]

743

ty, va, tb = sys.exc_info()

744

raise TypeError("not a valid non-string sequence "

745

"or mapping object").with_traceback(tb)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

746

747

l = []

748

if not doseq:

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

749

for k, v in query:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

750

if isinstance(k, bytes):

751

k = quote_plus(k, safe)

752

else:

753

k = quote_plus(str(k), safe, encoding, errors)

754

755

if isinstance(v, bytes):

756

v = quote_plus(v, safe)

757

else:

758

v = quote_plus(str(v), safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

759

l.append(k + '=' + v)

760

else:

761

for k, v in query:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

762

if isinstance(k, bytes):

763

k = quote_plus(k, safe)

764

else:

765

k = quote_plus(str(k), safe, encoding, errors)

766

767

if isinstance(v, bytes):

768

v = quote_plus(v, safe)

769

l.append(k + '=' + v)

770

elif isinstance(v, str):

771

v = quote_plus(v, safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

772

l.append(k + '=' + v)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

773

else:

774

try:

Jeremy Hylton

230feba

2009-03-26 16:56:59 +0000

[diff] [blame]

775

# Is this a sufficient test for sequence-ness?

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

776

x = len(v)

777

except TypeError:

778

# not a sequence

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

779

v = quote_plus(str(v), safe, encoding, errors)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

780

l.append(k + '=' + v)

781

else:

782

# loop over the sequence

783

for elt in v:

Senthil Kumaran

2010-07-03 17:48:22 +0000

[diff] [blame]

784

if isinstance(elt, bytes):

785

elt = quote_plus(elt, safe)

786

else:

787

elt = quote_plus(str(elt), safe, encoding, errors)

788

l.append(k + '=' + elt)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

789

return '&'.join(l)

790

791

# Utilities to parse URLs (most of these return None for missing parts):

792

# unwrap('<URL:type://host/path>') --> 'type://host/path'

793

# splittype('type:opaquestring') --> 'type', 'opaquestring'

794

# splithost('//host[:port]/path') --> 'host[:port]', '/path'

795

# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'

796

# splitpasswd('user:passwd') -> 'user', 'passwd'

797

# splitport('host:port') --> 'host', 'port'

798

# splitquery('/path?query') --> '/path', 'query'

799

# splittag('/path#tag') --> '/path', 'tag'

800

# splitattr('/path;attr1=value1;attr2=value2;...') ->

801

# '/path', ['attr1=value1', 'attr2=value2', ...]

802

# splitvalue('attr=value') --> 'attr', 'value'

803

# urllib.parse.unquote('abc%20def') -> 'abc def'

804

# quote('abc def') -> 'abc%20def')

805

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

806

def to_bytes(url):

807

"""to_bytes(u"URL") --> 'URL'."""

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

808

# Most URL schemes require ASCII. If that changes, the conversion

809

# can be relaxed.

Georg Brandl

13e8946

2008-07-01 19:56:00 +0000

[diff] [blame]

810

# XXX get rid of to_bytes()

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

811

if isinstance(url, str):

812

try:

813

url = url.encode("ASCII").decode()

814

except UnicodeError:

815

raise UnicodeError("URL " + repr(url) +

816

" contains non-ASCII characters")

return url

def unwrap(url):

"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""

821

url = str(url).strip()

822

if url[:1] == '<' and url[-1:] == '>':

823

url = url[1:-1].strip()

824

if url[:4] == 'URL:': url = url[4:].strip()

return url

_typeprog = None

def splittype(url):

"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""

830

global _typeprog

831

if _typeprog is None:

832

import re

833

_typeprog = re.compile('^([^/:]+):')

834

835

match = _typeprog.match(url)

836

if match:

837

scheme = match.group(1)

838

return scheme.lower(), url[len(scheme) + 1:]

return None, url

_hostprog = None

def splithost(url):

"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""

844

global _hostprog

845

if _hostprog is None:

846

import re

847

_hostprog = re.compile('^//([^/?]*)(.*)$')

848

849

match = _hostprog.match(url)

Senthil Kumaran

c295862

2010-11-22 04:48:26 +0000

[diff] [blame]

850

if match:

851

host_port = match.group(1)

852

path = match.group(2)

853

if path and not path.startswith('/'):

854

path = '/' + path

855

return host_port, path

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

return None, url

_userprog = None

def splituser(host):

"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""

861

global _userprog

862

if _userprog is None:

863

import re

864

_userprog = re.compile('^(.*)@(.*)$')

865

866

match = _userprog.match(host)

Senthil Kumaran

daa29d0

2010-11-18 15:36:41 +0000

[diff] [blame]

867

if match: return match.group(1, 2)

Jeremy Hylton

2008-06-18 20:49:58 +0000

[diff] [blame]

return None, host

_passwdprog = None

def splitpasswd(user):

872

"""splitpasswd('user:passwd') -> 'user', 'passwd'."""

873

global _passwdprog

874

if _passwdprog is None:

875

import re

Senthil Kumaran

eaaec27

2009-03-30 21:54:41 +0000

[diff] [blame]

876

_passwdprog = re.compile('^([^:]*):(.*)$',re.S)

Jeremy Hylton