blob: 92170ad0a2435907ab06f6e3404dccdad1986869 [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""Parse (absolute and relative) URLs.
2
Senthil Kumaranfd41e082010-04-17 14:44:14 +00003urlparse module is based upon the following RFC specifications.
4
5RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
6and L. Masinter, January 2005.
7
8RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
9and L.Masinter, December 1999.
10
Benjamin Petersond7c3ed52010-06-27 22:32:30 +000011RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
Senthil Kumaranfd41e082010-04-17 14:44:14 +000012Berners-Lee, R. Fielding, and L. Masinter, August 1998.
13
David Malcolmee255682010-12-02 16:41:00 +000014RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.
Senthil Kumaranfd41e082010-04-17 14:44:14 +000015
16RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
171995.
18
Benjamin Petersond7c3ed52010-06-27 22:32:30 +000019RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
Senthil Kumaranfd41e082010-04-17 14:44:14 +000020McCahill, December 1994
21
Benjamin Petersond7c3ed52010-06-27 22:32:30 +000022RFC 3986 is considered the current standard and any future changes to
23urlparse module should conform with it. The urlparse module is
24currently not entirely compliant with this RFC due to defacto
25scenarios for parsing, and for backward compatibility purposes, some
26parsing quirks from older RFCs are retained. The testcases in
Senthil Kumaranfd41e082010-04-17 14:44:14 +000027test_urlparse.py provides a good indicator of parsing behavior.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000028"""
29
Facundo Batista2ac5de22008-07-07 18:24:11 +000030import sys
Guido van Rossum52dbbb92008-08-18 21:44:30 +000031import collections
Facundo Batista2ac5de22008-07-07 18:24:11 +000032
Jeremy Hylton1afc1692008-06-18 20:49:58 +000033__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
Senthil Kumaran0256b2a2010-10-25 16:36:20 +000034 "urlsplit", "urlunsplit", "urlencode", "parse_qs",
35 "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
Guido van Rossum52dbbb92008-08-18 21:44:30 +000036 "unquote", "unquote_plus", "unquote_to_bytes"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037
38# A classification of schemes ('' means apply by default)
39uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
40 'wais', 'file', 'https', 'shttp', 'mms',
Senthil Kumaran2a157d22011-08-03 18:37:22 +080041 'prospero', 'rtsp', 'rtspu', '', 'sftp',
42 'svn', 'svn+ssh']
Jeremy Hylton1afc1692008-06-18 20:49:58 +000043uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
44 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
45 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
Florent Xiclunac7b8e862010-05-17 17:33:07 +000046 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
Jeremy Hylton1afc1692008-06-18 20:49:58 +000047uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
48 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
49 'mms', '', 'sftp']
Jeremy Hylton1afc1692008-06-18 20:49:58 +000050
51# Characters valid in scheme names
52scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
53 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
54 '0123456789'
55 '+-.')
56
Nick Coghlan9fc443c2010-11-30 15:48:08 +000057# XXX: Consider replacing with functools.lru_cache
Jeremy Hylton1afc1692008-06-18 20:49:58 +000058MAX_CACHE_SIZE = 20
59_parse_cache = {}
60
61def clear_cache():
Antoine Pitrou2df5fc72009-12-08 19:38:17 +000062 """Clear the parse cache and the quoters cache."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +000063 _parse_cache.clear()
Antoine Pitrou2df5fc72009-12-08 19:38:17 +000064 _safe_quoters.clear()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000065
66
Nick Coghlan9fc443c2010-11-30 15:48:08 +000067# Helpers for bytes handling
68# For 3.2, we deliberately require applications that
69# handle improperly quoted URLs to do their own
70# decoding and encoding. If valid use cases are
71# presented, we may relax this by using latin-1
72# decoding internally for 3.3
73_implicit_encoding = 'ascii'
74_implicit_errors = 'strict'
75
76def _noop(obj):
77 return obj
78
79def _encode_result(obj, encoding=_implicit_encoding,
80 errors=_implicit_errors):
81 return obj.encode(encoding, errors)
82
83def _decode_args(args, encoding=_implicit_encoding,
84 errors=_implicit_errors):
85 return tuple(x.decode(encoding, errors) if x else '' for x in args)
86
87def _coerce_args(*args):
88 # Invokes decode if necessary to create str args
89 # and returns the coerced inputs along with
90 # an appropriate result coercion function
91 # - noop for str inputs
92 # - encoding function otherwise
93 str_input = isinstance(args[0], str)
94 for arg in args[1:]:
95 # We special-case the empty string to support the
96 # "scheme=''" default argument to some functions
97 if arg and isinstance(arg, str) != str_input:
98 raise TypeError("Cannot mix str and non-str arguments")
99 if str_input:
100 return args + (_noop,)
101 return _decode_args(args) + (_encode_result,)
102
103# Result objects are more helpful than simple tuples
104class _ResultMixinStr(object):
105 """Standard approach to encoding parsed results from str to bytes"""
106 __slots__ = ()
107
108 def encode(self, encoding='ascii', errors='strict'):
109 return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
110
111
112class _ResultMixinBytes(object):
113 """Standard approach to decoding parsed results from bytes to str"""
114 __slots__ = ()
115
116 def decode(self, encoding='ascii', errors='strict'):
117 return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
118
119
120class _NetlocResultMixinBase(object):
121 """Shared methods for the parsed result objects containing a netloc element"""
122 __slots__ = ()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000123
124 @property
125 def username(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000126 return self._userinfo[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000127
128 @property
129 def password(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000130 return self._userinfo[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000131
132 @property
133 def hostname(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000134 hostname = self._hostinfo[0]
135 if not hostname:
136 hostname = None
137 elif hostname is not None:
138 hostname = hostname.lower()
139 return hostname
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000140
141 @property
142 def port(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000143 port = self._hostinfo[1]
144 if port is not None:
145 port = int(port, 10)
146 return port
147
148
149class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
150 __slots__ = ()
151
152 @property
153 def _userinfo(self):
154 netloc = self.netloc
155 userinfo, have_info, hostinfo = netloc.rpartition('@')
156 if have_info:
157 username, have_password, password = userinfo.partition(':')
158 if not have_password:
159 password = None
Senthil Kumaranad02d232010-04-16 03:02:13 +0000160 else:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000161 username = password = None
162 return username, password
163
164 @property
165 def _hostinfo(self):
166 netloc = self.netloc
167 _, _, hostinfo = netloc.rpartition('@')
168 _, have_open_br, bracketed = hostinfo.partition('[')
169 if have_open_br:
170 hostname, _, port = bracketed.partition(']')
171 _, have_port, port = port.partition(':')
172 else:
173 hostname, have_port, port = hostinfo.partition(':')
174 if not have_port:
175 port = None
176 return hostname, port
177
178
179class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
180 __slots__ = ()
181
182 @property
183 def _userinfo(self):
184 netloc = self.netloc
185 userinfo, have_info, hostinfo = netloc.rpartition(b'@')
186 if have_info:
187 username, have_password, password = userinfo.partition(b':')
188 if not have_password:
189 password = None
190 else:
191 username = password = None
192 return username, password
193
194 @property
195 def _hostinfo(self):
196 netloc = self.netloc
197 _, _, hostinfo = netloc.rpartition(b'@')
198 _, have_open_br, bracketed = hostinfo.partition(b'[')
199 if have_open_br:
200 hostname, _, port = bracketed.partition(b']')
201 _, have_port, port = port.partition(b':')
202 else:
203 hostname, have_port, port = hostinfo.partition(b':')
204 if not have_port:
205 port = None
206 return hostname, port
207
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000208
209from collections import namedtuple
210
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000211_DefragResultBase = namedtuple('DefragResult', 'url fragment')
212_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment')
213_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000214
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000215# For backwards compatibility, alias _NetlocResultMixinStr
216# ResultBase is no longer part of the documented API, but it is
217# retained since deprecating it isn't worth the hassle
218ResultBase = _NetlocResultMixinStr
219
220# Structured result objects for string data
221class DefragResult(_DefragResultBase, _ResultMixinStr):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000222 __slots__ = ()
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000223 def geturl(self):
224 if self.fragment:
225 return self.url + '#' + self.fragment
226 else:
227 return self.url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000228
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000229class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
230 __slots__ = ()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000231 def geturl(self):
232 return urlunsplit(self)
233
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000234class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000235 __slots__ = ()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000236 def geturl(self):
237 return urlunparse(self)
238
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000239# Structured result objects for bytes data
240class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
241 __slots__ = ()
242 def geturl(self):
243 if self.fragment:
244 return self.url + b'#' + self.fragment
245 else:
246 return self.url
247
248class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
249 __slots__ = ()
250 def geturl(self):
251 return urlunsplit(self)
252
253class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
254 __slots__ = ()
255 def geturl(self):
256 return urlunparse(self)
257
258# Set up the encode/decode result pairs
259def _fix_result_transcoding():
260 _result_pairs = (
261 (DefragResult, DefragResultBytes),
262 (SplitResult, SplitResultBytes),
263 (ParseResult, ParseResultBytes),
264 )
265 for _decoded, _encoded in _result_pairs:
266 _decoded._encoded_counterpart = _encoded
267 _encoded._decoded_counterpart = _decoded
268
269_fix_result_transcoding()
270del _fix_result_transcoding
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000271
272def urlparse(url, scheme='', allow_fragments=True):
273 """Parse a URL into 6 components:
274 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
275 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
276 Note that we don't break the components up in smaller bits
277 (e.g. netloc is a single string) and we don't expand % escapes."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000278 url, scheme, _coerce_result = _coerce_args(url, scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000279 tuple = urlsplit(url, scheme, allow_fragments)
280 scheme, netloc, url, query, fragment = tuple
281 if scheme in uses_params and ';' in url:
282 url, params = _splitparams(url)
283 else:
284 params = ''
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000285 result = ParseResult(scheme, netloc, url, params, query, fragment)
286 return _coerce_result(result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000287
288def _splitparams(url):
289 if '/' in url:
290 i = url.find(';', url.rfind('/'))
291 if i < 0:
292 return url, ''
293 else:
294 i = url.find(';')
295 return url[:i], url[i+1:]
296
297def _splitnetloc(url, start=0):
298 delim = len(url) # position of end of domain part of url, default is end
299 for c in '/?#': # look for delimiters; the order is NOT important
300 wdelim = url.find(c, start) # find first of this delim
301 if wdelim >= 0: # if found
302 delim = min(delim, wdelim) # use earliest delim position
303 return url[start:delim], url[delim:] # return (domain, rest)
304
305def urlsplit(url, scheme='', allow_fragments=True):
306 """Parse a URL into 5 components:
307 <scheme>://<netloc>/<path>?<query>#<fragment>
308 Return a 5-tuple: (scheme, netloc, path, query, fragment).
309 Note that we don't break the components up in smaller bits
310 (e.g. netloc is a single string) and we don't expand % escapes."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000311 url, scheme, _coerce_result = _coerce_args(url, scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000312 allow_fragments = bool(allow_fragments)
313 key = url, scheme, allow_fragments, type(url), type(scheme)
314 cached = _parse_cache.get(key, None)
315 if cached:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000316 return _coerce_result(cached)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000317 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
318 clear_cache()
319 netloc = query = fragment = ''
320 i = url.find(':')
321 if i > 0:
322 if url[:i] == 'http': # optimize the common case
323 scheme = url[:i].lower()
324 url = url[i+1:]
325 if url[:2] == '//':
326 netloc, url = _splitnetloc(url, 2)
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000327 if (('[' in netloc and ']' not in netloc) or
328 (']' in netloc and '[' not in netloc)):
329 raise ValueError("Invalid IPv6 URL")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000330 if allow_fragments and '#' in url:
331 url, fragment = url.split('#', 1)
332 if '?' in url:
333 url, query = url.split('?', 1)
334 v = SplitResult(scheme, netloc, url, query, fragment)
335 _parse_cache[key] = v
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000336 return _coerce_result(v)
Senthil Kumaran397eb442011-04-15 18:20:24 +0800337 for c in url[:i]:
338 if c not in scheme_chars:
339 break
340 else:
Ezio Melotti6709b7d2012-05-19 17:15:19 +0300341 # make sure "url" is not actually a port number (in which case
342 # "scheme" is really part of the path)
343 rest = url[i+1:]
344 if not rest or any(c not in '0123456789' for c in rest):
345 # not a port number
346 scheme, url = url[:i].lower(), rest
Senthil Kumaran397eb442011-04-15 18:20:24 +0800347
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000348 if url[:2] == '//':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000349 netloc, url = _splitnetloc(url, 2)
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000350 if (('[' in netloc and ']' not in netloc) or
351 (']' in netloc and '[' not in netloc)):
352 raise ValueError("Invalid IPv6 URL")
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800353 if allow_fragments and '#' in url:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000354 url, fragment = url.split('#', 1)
Senthil Kumaran1be320e2012-05-19 08:12:00 +0800355 if '?' in url:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000356 url, query = url.split('?', 1)
357 v = SplitResult(scheme, netloc, url, query, fragment)
358 _parse_cache[key] = v
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000359 return _coerce_result(v)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000360
361def urlunparse(components):
362 """Put a parsed URL back together again. This may result in a
363 slightly different, but equivalent URL, if the URL that was parsed
364 originally had redundant delimiters, e.g. a ? with an empty query
365 (the draft states that these are equivalent)."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000366 scheme, netloc, url, params, query, fragment, _coerce_result = (
367 _coerce_args(*components))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000368 if params:
369 url = "%s;%s" % (url, params)
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000370 return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000371
372def urlunsplit(components):
Senthil Kumaran8749a632010-06-28 14:08:00 +0000373 """Combine the elements of a tuple as returned by urlsplit() into a
374 complete URL as a string. The data argument can be any five-item iterable.
375 This may result in a slightly different, but equivalent URL, if the URL that
376 was parsed originally had unnecessary delimiters (for example, a ? with an
377 empty query; the RFC states that these are equivalent)."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000378 scheme, netloc, url, query, fragment, _coerce_result = (
379 _coerce_args(*components))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000380 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
381 if url and url[:1] != '/': url = '/' + url
382 url = '//' + (netloc or '') + url
383 if scheme:
384 url = scheme + ':' + url
385 if query:
386 url = url + '?' + query
387 if fragment:
388 url = url + '#' + fragment
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000389 return _coerce_result(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000390
391def urljoin(base, url, allow_fragments=True):
392 """Join a base URL and a possibly relative URL to form an absolute
393 interpretation of the latter."""
394 if not base:
395 return url
396 if not url:
397 return base
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000398 base, url, _coerce_result = _coerce_args(base, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000399 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
400 urlparse(base, '', allow_fragments)
401 scheme, netloc, path, params, query, fragment = \
402 urlparse(url, bscheme, allow_fragments)
403 if scheme != bscheme or scheme not in uses_relative:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000404 return _coerce_result(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000405 if scheme in uses_netloc:
406 if netloc:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000407 return _coerce_result(urlunparse((scheme, netloc, path,
408 params, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000409 netloc = bnetloc
410 if path[:1] == '/':
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000411 return _coerce_result(urlunparse((scheme, netloc, path,
412 params, query, fragment)))
Senthil Kumarandca5b862010-12-17 04:48:45 +0000413 if not path and not params:
Facundo Batista23e38562008-08-14 16:55:14 +0000414 path = bpath
Senthil Kumarandca5b862010-12-17 04:48:45 +0000415 params = bparams
Facundo Batista23e38562008-08-14 16:55:14 +0000416 if not query:
417 query = bquery
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000418 return _coerce_result(urlunparse((scheme, netloc, path,
419 params, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000420 segments = bpath.split('/')[:-1] + path.split('/')
421 # XXX The stuff below is bogus in various ways...
422 if segments[-1] == '.':
423 segments[-1] = ''
424 while '.' in segments:
425 segments.remove('.')
426 while 1:
427 i = 1
428 n = len(segments) - 1
429 while i < n:
430 if (segments[i] == '..'
431 and segments[i-1] not in ('', '..')):
432 del segments[i-1:i+1]
433 break
434 i = i+1
435 else:
436 break
437 if segments == ['', '..']:
438 segments[-1] = ''
439 elif len(segments) >= 2 and segments[-1] == '..':
440 segments[-2:] = ['']
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000441 return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),
442 params, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000443
444def urldefrag(url):
445 """Removes any existing fragment from URL.
446
447 Returns a tuple of the defragmented URL and the fragment. If
448 the URL contained no fragments, the second element is the
449 empty string.
450 """
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000451 url, _coerce_result = _coerce_args(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000452 if '#' in url:
453 s, n, p, a, q, frag = urlparse(url)
454 defrag = urlunparse((s, n, p, a, q, ''))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000455 else:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000456 frag = ''
457 defrag = url
458 return _coerce_result(DefragResult(defrag, frag))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000459
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000460def unquote_to_bytes(string):
461 """unquote_to_bytes('abc%20def') -> b'abc def'."""
462 # Note: strings are encoded as UTF-8. This is only an issue if it contains
463 # unescaped non-ASCII characters, which URIs should not.
Florent Xicluna82a3f8a2010-08-14 18:30:35 +0000464 if not string:
465 # Is it a string-like object?
466 string.split
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000467 return b''
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000468 if isinstance(string, str):
469 string = string.encode('utf-8')
470 res = string.split(b'%')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000471 if len(res) == 1:
472 return string
473 string = res[0]
474 for item in res[1:]:
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000475 try:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000476 string += bytes([int(item[:2], 16)]) + item[2:]
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000477 except ValueError:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000478 string += b'%' + item
479 return string
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000480
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000481def unquote(string, encoding='utf-8', errors='replace'):
482 """Replace %xx escapes by their single-character equivalent. The optional
483 encoding and errors parameters specify how to decode percent-encoded
484 sequences into Unicode characters, as accepted by the bytes.decode()
485 method.
486 By default, percent-encoded sequences are decoded with UTF-8, and invalid
487 sequences are replaced by a placeholder character.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000488
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000489 unquote('abc%20def') -> 'abc def'.
490 """
Florent Xiclunac049fca2010-07-31 08:56:55 +0000491 if string == '':
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000492 return string
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000493 res = string.split('%')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000494 if len(res) == 1:
495 return string
496 if encoding is None:
497 encoding = 'utf-8'
498 if errors is None:
499 errors = 'replace'
Florent Xicluna0f78a942010-05-17 18:01:22 +0000500 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000501 pct_sequence = b''
502 string = res[0]
503 for item in res[1:]:
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000504 try:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000505 if not item:
506 raise ValueError
507 pct_sequence += bytes.fromhex(item[:2])
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000508 rest = item[2:]
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000509 if not rest:
510 # This segment was just a single percent-encoded character.
511 # May be part of a sequence of code units, so delay decoding.
512 # (Stored in pct_sequence).
513 continue
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000514 except ValueError:
515 rest = '%' + item
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000516 # Encountered non-percent-encoded characters. Flush the current
517 # pct_sequence.
518 string += pct_sequence.decode(encoding, errors) + rest
519 pct_sequence = b''
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000520 if pct_sequence:
521 # Flush the final pct_sequence
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000522 string += pct_sequence.decode(encoding, errors)
523 return string
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000524
Victor Stinnerac71c542011-01-14 12:52:12 +0000525def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
526 encoding='utf-8', errors='replace'):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000527 """Parse a query given as a string argument.
528
529 Arguments:
530
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000531 qs: percent-encoded query string to be parsed
Facundo Batistac469d4c2008-09-03 22:49:01 +0000532
533 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000534 percent-encoded queries should be treated as blank strings.
Facundo Batistac469d4c2008-09-03 22:49:01 +0000535 A true value indicates that blanks should be retained as
536 blank strings. The default false value indicates that
537 blank values are to be ignored and treated as if they were
538 not included.
539
540 strict_parsing: flag indicating what to do with parsing errors.
541 If false (the default), errors are silently ignored.
542 If true, errors raise a ValueError exception.
Victor Stinnerac71c542011-01-14 12:52:12 +0000543
544 encoding and errors: specify how to decode percent-encoded sequences
545 into Unicode characters, as accepted by the bytes.decode() method.
Facundo Batistac469d4c2008-09-03 22:49:01 +0000546 """
547 dict = {}
Victor Stinnerac71c542011-01-14 12:52:12 +0000548 pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
549 encoding=encoding, errors=errors)
550 for name, value in pairs:
Facundo Batistac469d4c2008-09-03 22:49:01 +0000551 if name in dict:
552 dict[name].append(value)
553 else:
554 dict[name] = [value]
555 return dict
556
Victor Stinnerac71c542011-01-14 12:52:12 +0000557def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
558 encoding='utf-8', errors='replace'):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000559 """Parse a query given as a string argument.
560
561 Arguments:
562
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000563 qs: percent-encoded query string to be parsed
Facundo Batistac469d4c2008-09-03 22:49:01 +0000564
565 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000566 percent-encoded queries should be treated as blank strings. A
Facundo Batistac469d4c2008-09-03 22:49:01 +0000567 true value indicates that blanks should be retained as blank
568 strings. The default false value indicates that blank values
569 are to be ignored and treated as if they were not included.
570
571 strict_parsing: flag indicating what to do with parsing errors. If
572 false (the default), errors are silently ignored. If true,
573 errors raise a ValueError exception.
574
Victor Stinnerac71c542011-01-14 12:52:12 +0000575 encoding and errors: specify how to decode percent-encoded sequences
576 into Unicode characters, as accepted by the bytes.decode() method.
577
Facundo Batistac469d4c2008-09-03 22:49:01 +0000578 Returns a list, as G-d intended.
579 """
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000580 qs, _coerce_result = _coerce_args(qs)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000581 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
582 r = []
583 for name_value in pairs:
584 if not name_value and not strict_parsing:
585 continue
586 nv = name_value.split('=', 1)
587 if len(nv) != 2:
588 if strict_parsing:
589 raise ValueError("bad query field: %r" % (name_value,))
590 # Handle case of a control-name with no equal sign
591 if keep_blank_values:
592 nv.append('')
593 else:
594 continue
595 if len(nv[1]) or keep_blank_values:
Victor Stinnerac71c542011-01-14 12:52:12 +0000596 name = nv[0].replace('+', ' ')
597 name = unquote(name, encoding=encoding, errors=errors)
598 name = _coerce_result(name)
599 value = nv[1].replace('+', ' ')
600 value = unquote(value, encoding=encoding, errors=errors)
601 value = _coerce_result(value)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000602 r.append((name, value))
Facundo Batistac469d4c2008-09-03 22:49:01 +0000603 return r
604
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000605def unquote_plus(string, encoding='utf-8', errors='replace'):
606 """Like unquote(), but also replace plus signs by spaces, as required for
607 unquoting HTML form values.
608
609 unquote_plus('%7e/abc+def') -> '~/abc def'
610 """
611 string = string.replace('+', ' ')
612 return unquote(string, encoding, errors)
613
614_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
615 b'abcdefghijklmnopqrstuvwxyz'
616 b'0123456789'
617 b'_.-')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000618_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
619_safe_quoters = {}
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000620
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000621class Quoter(collections.defaultdict):
622 """A mapping from bytes (in range(0,256)) to strings.
623
624 String values are percent-encoded byte values, unless the key < 128, and
625 in the "safe" set (either the specified safe set, or default set).
626 """
627 # Keeps a cache internally, using defaultdict, for efficiency (lookups
628 # of cached keys don't call Python code at all).
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000629 def __init__(self, safe):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000630 """safe: bytes object."""
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000631 self.safe = _ALWAYS_SAFE.union(safe)
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000632
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000633 def __repr__(self):
634 # Without this, will just display as a defaultdict
635 return "<Quoter %r>" % dict(self)
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000636
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000637 def __missing__(self, b):
638 # Handle a cache miss. Store quoted string in cache and return.
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000639 res = chr(b) if b in self.safe else '%{:02X}'.format(b)
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000640 self[b] = res
641 return res
642
643def quote(string, safe='/', encoding=None, errors=None):
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000644 """quote('abc def') -> 'abc%20def'
645
646 Each part of a URL, e.g. the path info, the query, etc., has a
647 different set of reserved characters that must be quoted.
648
649 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
650 the following reserved characters.
651
652 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
653 "$" | ","
654
655 Each of these characters is reserved in some component of a URL,
656 but not necessarily in all of them.
657
658 By default, the quote function is intended for quoting the path
659 section of a URL. Thus, it will not encode '/'. This character
660 is reserved, but in typical usage the quote function is being
661 called on a path where the existing slash characters are used as
662 reserved characters.
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000663
664 string and safe may be either str or bytes objects. encoding must
665 not be specified if string is a str.
666
667 The optional encoding and errors parameters specify how to deal with
668 non-ASCII characters, as accepted by the str.encode method.
669 By default, encoding='utf-8' (characters are encoded with UTF-8), and
670 errors='strict' (unsupported characters raise a UnicodeEncodeError).
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000671 """
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000672 if isinstance(string, str):
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000673 if not string:
674 return string
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000675 if encoding is None:
676 encoding = 'utf-8'
677 if errors is None:
678 errors = 'strict'
679 string = string.encode(encoding, errors)
680 else:
681 if encoding is not None:
682 raise TypeError("quote() doesn't support 'encoding' for bytes")
683 if errors is not None:
684 raise TypeError("quote() doesn't support 'errors' for bytes")
685 return quote_from_bytes(string, safe)
686
687def quote_plus(string, safe='', encoding=None, errors=None):
688 """Like quote(), but also replace ' ' with '+', as required for quoting
689 HTML form values. Plus signs in the original string are escaped unless
690 they are included in safe. It also does not have safe default to '/'.
691 """
Jeremy Hyltonf8198862009-03-26 16:55:08 +0000692 # Check if ' ' in string, where string may either be a str or bytes. If
693 # there are no spaces, the regular quote will produce the right answer.
694 if ((isinstance(string, str) and ' ' not in string) or
695 (isinstance(string, bytes) and b' ' not in string)):
696 return quote(string, safe, encoding, errors)
697 if isinstance(safe, str):
698 space = ' '
699 else:
700 space = b' '
Georg Brandlfaf41492009-05-26 18:31:11 +0000701 string = quote(string, safe + space, encoding, errors)
Jeremy Hyltonf8198862009-03-26 16:55:08 +0000702 return string.replace(' ', '+')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000703
704def quote_from_bytes(bs, safe='/'):
705 """Like quote(), but accepts a bytes object rather than a str, and does
706 not perform string-to-bytes encoding. It always returns an ASCII string.
707 quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB'
708 """
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000709 if not isinstance(bs, (bytes, bytearray)):
710 raise TypeError("quote_from_bytes() expected bytes")
711 if not bs:
712 return ''
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000713 if isinstance(safe, str):
714 # Normalize 'safe' by converting to bytes and removing non-ASCII chars
715 safe = safe.encode('ascii', 'ignore')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000716 else:
717 safe = bytes([c for c in safe if c < 128])
718 if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
719 return bs.decode()
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000720 try:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000721 quoter = _safe_quoters[safe]
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000722 except KeyError:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000723 _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
724 return ''.join([quoter(char) for char in bs])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000725
Senthil Kumarandf022da2010-07-03 17:48:22 +0000726def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000727 """Encode a sequence of two-element tuples or dictionary into a URL query string.
728
729 If any values in the query arg are sequences and doseq is true, each
730 sequence element is converted to a separate parameter.
731
732 If the query arg is a sequence of two-element tuples, the order of the
733 parameters in the output will match the order of parameters in the
734 input.
Senthil Kumarandf022da2010-07-03 17:48:22 +0000735
736 The query arg may be either a string or a bytes type. When query arg is a
737 string, the safe, encoding and error parameters are sent the quote_plus for
738 encoding.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000739 """
740
Jeremy Hyltona4de60a2009-03-26 14:49:26 +0000741 if hasattr(query, "items"):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000742 query = query.items()
743 else:
Jeremy Hylton230feba2009-03-26 16:56:59 +0000744 # It's a bother at times that strings and string-like objects are
745 # sequences.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000746 try:
747 # non-sequence items should not work with len()
748 # non-empty strings will fail this
749 if len(query) and not isinstance(query[0], tuple):
750 raise TypeError
Jeremy Hylton230feba2009-03-26 16:56:59 +0000751 # Zero-length sequences of all types will get here and succeed,
752 # but that's a minor nit. Since the original implementation
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000753 # allowed empty dicts that type of behavior probably should be
754 # preserved for consistency
755 except TypeError:
Jeremy Hyltona4de60a2009-03-26 14:49:26 +0000756 ty, va, tb = sys.exc_info()
757 raise TypeError("not a valid non-string sequence "
758 "or mapping object").with_traceback(tb)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000759
760 l = []
761 if not doseq:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000762 for k, v in query:
Senthil Kumarandf022da2010-07-03 17:48:22 +0000763 if isinstance(k, bytes):
764 k = quote_plus(k, safe)
765 else:
766 k = quote_plus(str(k), safe, encoding, errors)
767
768 if isinstance(v, bytes):
769 v = quote_plus(v, safe)
770 else:
771 v = quote_plus(str(v), safe, encoding, errors)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000772 l.append(k + '=' + v)
773 else:
774 for k, v in query:
Senthil Kumarandf022da2010-07-03 17:48:22 +0000775 if isinstance(k, bytes):
776 k = quote_plus(k, safe)
777 else:
778 k = quote_plus(str(k), safe, encoding, errors)
779
780 if isinstance(v, bytes):
781 v = quote_plus(v, safe)
782 l.append(k + '=' + v)
783 elif isinstance(v, str):
784 v = quote_plus(v, safe, encoding, errors)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000785 l.append(k + '=' + v)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000786 else:
787 try:
Jeremy Hylton230feba2009-03-26 16:56:59 +0000788 # Is this a sufficient test for sequence-ness?
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000789 x = len(v)
790 except TypeError:
791 # not a sequence
Senthil Kumarandf022da2010-07-03 17:48:22 +0000792 v = quote_plus(str(v), safe, encoding, errors)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000793 l.append(k + '=' + v)
794 else:
795 # loop over the sequence
796 for elt in v:
Senthil Kumarandf022da2010-07-03 17:48:22 +0000797 if isinstance(elt, bytes):
798 elt = quote_plus(elt, safe)
799 else:
800 elt = quote_plus(str(elt), safe, encoding, errors)
801 l.append(k + '=' + elt)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000802 return '&'.join(l)
803
804# Utilities to parse URLs (most of these return None for missing parts):
805# unwrap('<URL:type://host/path>') --> 'type://host/path'
806# splittype('type:opaquestring') --> 'type', 'opaquestring'
807# splithost('//host[:port]/path') --> 'host[:port]', '/path'
808# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
809# splitpasswd('user:passwd') -> 'user', 'passwd'
810# splitport('host:port') --> 'host', 'port'
811# splitquery('/path?query') --> '/path', 'query'
812# splittag('/path#tag') --> '/path', 'tag'
813# splitattr('/path;attr1=value1;attr2=value2;...') ->
814# '/path', ['attr1=value1', 'attr2=value2', ...]
815# splitvalue('attr=value') --> 'attr', 'value'
816# urllib.parse.unquote('abc%20def') -> 'abc def'
817# quote('abc def') -> 'abc%20def')
818
Georg Brandl13e89462008-07-01 19:56:00 +0000819def to_bytes(url):
820 """to_bytes(u"URL") --> 'URL'."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000821 # Most URL schemes require ASCII. If that changes, the conversion
822 # can be relaxed.
Georg Brandl13e89462008-07-01 19:56:00 +0000823 # XXX get rid of to_bytes()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000824 if isinstance(url, str):
825 try:
826 url = url.encode("ASCII").decode()
827 except UnicodeError:
828 raise UnicodeError("URL " + repr(url) +
829 " contains non-ASCII characters")
830 return url
831
832def unwrap(url):
833 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
834 url = str(url).strip()
835 if url[:1] == '<' and url[-1:] == '>':
836 url = url[1:-1].strip()
837 if url[:4] == 'URL:': url = url[4:].strip()
838 return url
839
840_typeprog = None
841def splittype(url):
842 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
843 global _typeprog
844 if _typeprog is None:
845 import re
846 _typeprog = re.compile('^([^/:]+):')
847
848 match = _typeprog.match(url)
849 if match:
850 scheme = match.group(1)
851 return scheme.lower(), url[len(scheme) + 1:]
852 return None, url
853
854_hostprog = None
855def splithost(url):
856 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
857 global _hostprog
858 if _hostprog is None:
859 import re
860 _hostprog = re.compile('^//([^/?]*)(.*)$')
861
862 match = _hostprog.match(url)
Senthil Kumaranc2958622010-11-22 04:48:26 +0000863 if match:
864 host_port = match.group(1)
865 path = match.group(2)
866 if path and not path.startswith('/'):
867 path = '/' + path
868 return host_port, path
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000869 return None, url
870
871_userprog = None
872def splituser(host):
873 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
874 global _userprog
875 if _userprog is None:
876 import re
877 _userprog = re.compile('^(.*)@(.*)$')
878
879 match = _userprog.match(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000880 if match: return match.group(1, 2)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000881 return None, host
882
883_passwdprog = None
884def splitpasswd(user):
885 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
886 global _passwdprog
887 if _passwdprog is None:
888 import re
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000889 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000890
891 match = _passwdprog.match(user)
892 if match: return match.group(1, 2)
893 return user, None
894
895# splittag('/path#tag') --> '/path', 'tag'
896_portprog = None
897def splitport(host):
898 """splitport('host:port') --> 'host', 'port'."""
899 global _portprog
900 if _portprog is None:
901 import re
902 _portprog = re.compile('^(.*):([0-9]+)$')
903
904 match = _portprog.match(host)
905 if match: return match.group(1, 2)
906 return host, None
907
908_nportprog = None
909def splitnport(host, defport=-1):
910 """Split host and port, returning numeric port.
911 Return given default port if no ':' found; defaults to -1.
912 Return numerical port if a valid number are found after ':'.
913 Return None if ':' but not a valid number."""
914 global _nportprog
915 if _nportprog is None:
916 import re
917 _nportprog = re.compile('^(.*):(.*)$')
918
919 match = _nportprog.match(host)
920 if match:
921 host, port = match.group(1, 2)
922 try:
923 if not port: raise ValueError("no digits")
924 nport = int(port)
925 except ValueError:
926 nport = None
927 return host, nport
928 return host, defport
929
930_queryprog = None
931def splitquery(url):
932 """splitquery('/path?query') --> '/path', 'query'."""
933 global _queryprog
934 if _queryprog is None:
935 import re
936 _queryprog = re.compile('^(.*)\?([^?]*)$')
937
938 match = _queryprog.match(url)
939 if match: return match.group(1, 2)
940 return url, None
941
942_tagprog = None
943def splittag(url):
944 """splittag('/path#tag') --> '/path', 'tag'."""
945 global _tagprog
946 if _tagprog is None:
947 import re
948 _tagprog = re.compile('^(.*)#([^#]*)$')
949
950 match = _tagprog.match(url)
951 if match: return match.group(1, 2)
952 return url, None
953
954def splitattr(url):
955 """splitattr('/path;attr1=value1;attr2=value2;...') ->
956 '/path', ['attr1=value1', 'attr2=value2', ...]."""
957 words = url.split(';')
958 return words[0], words[1:]
959
960_valueprog = None
961def splitvalue(attr):
962 """splitvalue('attr=value') --> 'attr', 'value'."""
963 global _valueprog
964 if _valueprog is None:
965 import re
966 _valueprog = re.compile('^([^=]*)=(.*)$')
967
968 match = _valueprog.match(attr)
969 if match: return match.group(1, 2)
970 return attr, None