blob: 9a3e42ee786714d26e267fa65176890553f7a3df [file] [log] [blame]
Jeremy Hylton1afc1692008-06-18 20:49:58 +00001"""Parse (absolute and relative) URLs.
2
Senthil Kumaranfd41e082010-04-17 14:44:14 +00003urlparse module is based upon the following RFC specifications.
4
5RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
6and L. Masinter, January 2005.
7
8RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
9and L.Masinter, December 1999.
10
Benjamin Petersond7c3ed52010-06-27 22:32:30 +000011RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
Senthil Kumaranfd41e082010-04-17 14:44:14 +000012Berners-Lee, R. Fielding, and L. Masinter, August 1998.
13
David Malcolmee255682010-12-02 16:41:00 +000014RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.
Senthil Kumaranfd41e082010-04-17 14:44:14 +000015
16RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
171995.
18
Benjamin Petersond7c3ed52010-06-27 22:32:30 +000019RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
Senthil Kumaranfd41e082010-04-17 14:44:14 +000020McCahill, December 1994
21
Benjamin Petersond7c3ed52010-06-27 22:32:30 +000022RFC 3986 is considered the current standard and any future changes to
23urlparse module should conform with it. The urlparse module is
24currently not entirely compliant with this RFC due to defacto
25scenarios for parsing, and for backward compatibility purposes, some
26parsing quirks from older RFCs are retained. The testcases in
Senthil Kumaranfd41e082010-04-17 14:44:14 +000027test_urlparse.py provides a good indicator of parsing behavior.
Jeremy Hylton1afc1692008-06-18 20:49:58 +000028"""
29
Facundo Batista2ac5de22008-07-07 18:24:11 +000030import sys
Guido van Rossum52dbbb92008-08-18 21:44:30 +000031import collections
Facundo Batista2ac5de22008-07-07 18:24:11 +000032
Jeremy Hylton1afc1692008-06-18 20:49:58 +000033__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
Senthil Kumaran0256b2a2010-10-25 16:36:20 +000034 "urlsplit", "urlunsplit", "urlencode", "parse_qs",
35 "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
Guido van Rossum52dbbb92008-08-18 21:44:30 +000036 "unquote", "unquote_plus", "unquote_to_bytes"]
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037
38# A classification of schemes ('' means apply by default)
39uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
40 'wais', 'file', 'https', 'shttp', 'mms',
41 'prospero', 'rtsp', 'rtspu', '', 'sftp']
42uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
43 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
44 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
Florent Xiclunac7b8e862010-05-17 17:33:07 +000045 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
Jeremy Hylton1afc1692008-06-18 20:49:58 +000046non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
47 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
48uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
49 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
50 'mms', '', 'sftp']
51uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
52 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
53uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
54 'nntp', 'wais', 'https', 'shttp', 'snews',
55 'file', 'prospero', '']
56
57# Characters valid in scheme names
58scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
59 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
60 '0123456789'
61 '+-.')
62
Nick Coghlan9fc443c2010-11-30 15:48:08 +000063# XXX: Consider replacing with functools.lru_cache
Jeremy Hylton1afc1692008-06-18 20:49:58 +000064MAX_CACHE_SIZE = 20
65_parse_cache = {}
66
67def clear_cache():
Antoine Pitrou2df5fc72009-12-08 19:38:17 +000068 """Clear the parse cache and the quoters cache."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +000069 _parse_cache.clear()
Antoine Pitrou2df5fc72009-12-08 19:38:17 +000070 _safe_quoters.clear()
Jeremy Hylton1afc1692008-06-18 20:49:58 +000071
72
Nick Coghlan9fc443c2010-11-30 15:48:08 +000073# Helpers for bytes handling
74# For 3.2, we deliberately require applications that
75# handle improperly quoted URLs to do their own
76# decoding and encoding. If valid use cases are
77# presented, we may relax this by using latin-1
78# decoding internally for 3.3
79_implicit_encoding = 'ascii'
80_implicit_errors = 'strict'
81
82def _noop(obj):
83 return obj
84
85def _encode_result(obj, encoding=_implicit_encoding,
86 errors=_implicit_errors):
87 return obj.encode(encoding, errors)
88
89def _decode_args(args, encoding=_implicit_encoding,
90 errors=_implicit_errors):
91 return tuple(x.decode(encoding, errors) if x else '' for x in args)
92
93def _coerce_args(*args):
94 # Invokes decode if necessary to create str args
95 # and returns the coerced inputs along with
96 # an appropriate result coercion function
97 # - noop for str inputs
98 # - encoding function otherwise
99 str_input = isinstance(args[0], str)
100 for arg in args[1:]:
101 # We special-case the empty string to support the
102 # "scheme=''" default argument to some functions
103 if arg and isinstance(arg, str) != str_input:
104 raise TypeError("Cannot mix str and non-str arguments")
105 if str_input:
106 return args + (_noop,)
107 return _decode_args(args) + (_encode_result,)
108
109# Result objects are more helpful than simple tuples
110class _ResultMixinStr(object):
111 """Standard approach to encoding parsed results from str to bytes"""
112 __slots__ = ()
113
114 def encode(self, encoding='ascii', errors='strict'):
115 return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
116
117
118class _ResultMixinBytes(object):
119 """Standard approach to decoding parsed results from bytes to str"""
120 __slots__ = ()
121
122 def decode(self, encoding='ascii', errors='strict'):
123 return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
124
125
126class _NetlocResultMixinBase(object):
127 """Shared methods for the parsed result objects containing a netloc element"""
128 __slots__ = ()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000129
130 @property
131 def username(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000132 return self._userinfo[0]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000133
134 @property
135 def password(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000136 return self._userinfo[1]
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000137
138 @property
139 def hostname(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000140 hostname = self._hostinfo[0]
141 if not hostname:
142 hostname = None
143 elif hostname is not None:
144 hostname = hostname.lower()
145 return hostname
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000146
147 @property
148 def port(self):
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000149 port = self._hostinfo[1]
150 if port is not None:
151 port = int(port, 10)
152 return port
153
154
155class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
156 __slots__ = ()
157
158 @property
159 def _userinfo(self):
160 netloc = self.netloc
161 userinfo, have_info, hostinfo = netloc.rpartition('@')
162 if have_info:
163 username, have_password, password = userinfo.partition(':')
164 if not have_password:
165 password = None
Senthil Kumaranad02d232010-04-16 03:02:13 +0000166 else:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000167 username = password = None
168 return username, password
169
170 @property
171 def _hostinfo(self):
172 netloc = self.netloc
173 _, _, hostinfo = netloc.rpartition('@')
174 _, have_open_br, bracketed = hostinfo.partition('[')
175 if have_open_br:
176 hostname, _, port = bracketed.partition(']')
177 _, have_port, port = port.partition(':')
178 else:
179 hostname, have_port, port = hostinfo.partition(':')
180 if not have_port:
181 port = None
182 return hostname, port
183
184
185class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
186 __slots__ = ()
187
188 @property
189 def _userinfo(self):
190 netloc = self.netloc
191 userinfo, have_info, hostinfo = netloc.rpartition(b'@')
192 if have_info:
193 username, have_password, password = userinfo.partition(b':')
194 if not have_password:
195 password = None
196 else:
197 username = password = None
198 return username, password
199
200 @property
201 def _hostinfo(self):
202 netloc = self.netloc
203 _, _, hostinfo = netloc.rpartition(b'@')
204 _, have_open_br, bracketed = hostinfo.partition(b'[')
205 if have_open_br:
206 hostname, _, port = bracketed.partition(b']')
207 _, have_port, port = port.partition(b':')
208 else:
209 hostname, have_port, port = hostinfo.partition(b':')
210 if not have_port:
211 port = None
212 return hostname, port
213
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000214
215from collections import namedtuple
216
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000217_DefragResultBase = namedtuple('DefragResult', 'url fragment')
218_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment')
219_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment')
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000220
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000221# For backwards compatibility, alias _NetlocResultMixinStr
222# ResultBase is no longer part of the documented API, but it is
223# retained since deprecating it isn't worth the hassle
224ResultBase = _NetlocResultMixinStr
225
226# Structured result objects for string data
227class DefragResult(_DefragResultBase, _ResultMixinStr):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000228 __slots__ = ()
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000229 def geturl(self):
230 if self.fragment:
231 return self.url + '#' + self.fragment
232 else:
233 return self.url
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000234
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000235class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
236 __slots__ = ()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000237 def geturl(self):
238 return urlunsplit(self)
239
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000240class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000241 __slots__ = ()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000242 def geturl(self):
243 return urlunparse(self)
244
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000245# Structured result objects for bytes data
246class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
247 __slots__ = ()
248 def geturl(self):
249 if self.fragment:
250 return self.url + b'#' + self.fragment
251 else:
252 return self.url
253
254class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
255 __slots__ = ()
256 def geturl(self):
257 return urlunsplit(self)
258
259class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
260 __slots__ = ()
261 def geturl(self):
262 return urlunparse(self)
263
264# Set up the encode/decode result pairs
265def _fix_result_transcoding():
266 _result_pairs = (
267 (DefragResult, DefragResultBytes),
268 (SplitResult, SplitResultBytes),
269 (ParseResult, ParseResultBytes),
270 )
271 for _decoded, _encoded in _result_pairs:
272 _decoded._encoded_counterpart = _encoded
273 _encoded._decoded_counterpart = _decoded
274
275_fix_result_transcoding()
276del _fix_result_transcoding
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000277
278def urlparse(url, scheme='', allow_fragments=True):
279 """Parse a URL into 6 components:
280 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
281 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
282 Note that we don't break the components up in smaller bits
283 (e.g. netloc is a single string) and we don't expand % escapes."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000284 url, scheme, _coerce_result = _coerce_args(url, scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000285 tuple = urlsplit(url, scheme, allow_fragments)
286 scheme, netloc, url, query, fragment = tuple
287 if scheme in uses_params and ';' in url:
288 url, params = _splitparams(url)
289 else:
290 params = ''
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000291 result = ParseResult(scheme, netloc, url, params, query, fragment)
292 return _coerce_result(result)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000293
294def _splitparams(url):
295 if '/' in url:
296 i = url.find(';', url.rfind('/'))
297 if i < 0:
298 return url, ''
299 else:
300 i = url.find(';')
301 return url[:i], url[i+1:]
302
303def _splitnetloc(url, start=0):
304 delim = len(url) # position of end of domain part of url, default is end
305 for c in '/?#': # look for delimiters; the order is NOT important
306 wdelim = url.find(c, start) # find first of this delim
307 if wdelim >= 0: # if found
308 delim = min(delim, wdelim) # use earliest delim position
309 return url[start:delim], url[delim:] # return (domain, rest)
310
311def urlsplit(url, scheme='', allow_fragments=True):
312 """Parse a URL into 5 components:
313 <scheme>://<netloc>/<path>?<query>#<fragment>
314 Return a 5-tuple: (scheme, netloc, path, query, fragment).
315 Note that we don't break the components up in smaller bits
316 (e.g. netloc is a single string) and we don't expand % escapes."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000317 url, scheme, _coerce_result = _coerce_args(url, scheme)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000318 allow_fragments = bool(allow_fragments)
319 key = url, scheme, allow_fragments, type(url), type(scheme)
320 cached = _parse_cache.get(key, None)
321 if cached:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000322 return _coerce_result(cached)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000323 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
324 clear_cache()
325 netloc = query = fragment = ''
326 i = url.find(':')
327 if i > 0:
328 if url[:i] == 'http': # optimize the common case
329 scheme = url[:i].lower()
330 url = url[i+1:]
331 if url[:2] == '//':
332 netloc, url = _splitnetloc(url, 2)
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000333 if (('[' in netloc and ']' not in netloc) or
334 (']' in netloc and '[' not in netloc)):
335 raise ValueError("Invalid IPv6 URL")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000336 if allow_fragments and '#' in url:
337 url, fragment = url.split('#', 1)
338 if '?' in url:
339 url, query = url.split('?', 1)
340 v = SplitResult(scheme, netloc, url, query, fragment)
341 _parse_cache[key] = v
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000342 return _coerce_result(v)
Senthil Kumaran84c7d9f2010-08-04 04:50:44 +0000343 if url.endswith(':') or not url[i+1].isdigit():
344 for c in url[:i]:
345 if c not in scheme_chars:
346 break
347 else:
348 scheme, url = url[:i].lower(), url[i+1:]
Senthil Kumaran6be85c52010-02-19 07:42:50 +0000349 if url[:2] == '//':
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000350 netloc, url = _splitnetloc(url, 2)
Senthil Kumaran7a1e09f2010-04-22 12:19:46 +0000351 if (('[' in netloc and ']' not in netloc) or
352 (']' in netloc and '[' not in netloc)):
353 raise ValueError("Invalid IPv6 URL")
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000354 if allow_fragments and scheme in uses_fragment and '#' in url:
355 url, fragment = url.split('#', 1)
356 if scheme in uses_query and '?' in url:
357 url, query = url.split('?', 1)
358 v = SplitResult(scheme, netloc, url, query, fragment)
359 _parse_cache[key] = v
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000360 return _coerce_result(v)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000361
362def urlunparse(components):
363 """Put a parsed URL back together again. This may result in a
364 slightly different, but equivalent URL, if the URL that was parsed
365 originally had redundant delimiters, e.g. a ? with an empty query
366 (the draft states that these are equivalent)."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000367 scheme, netloc, url, params, query, fragment, _coerce_result = (
368 _coerce_args(*components))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000369 if params:
370 url = "%s;%s" % (url, params)
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000371 return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000372
373def urlunsplit(components):
Senthil Kumaran8749a632010-06-28 14:08:00 +0000374 """Combine the elements of a tuple as returned by urlsplit() into a
375 complete URL as a string. The data argument can be any five-item iterable.
376 This may result in a slightly different, but equivalent URL, if the URL that
377 was parsed originally had unnecessary delimiters (for example, a ? with an
378 empty query; the RFC states that these are equivalent)."""
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000379 scheme, netloc, url, query, fragment, _coerce_result = (
380 _coerce_args(*components))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000381 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
382 if url and url[:1] != '/': url = '/' + url
383 url = '//' + (netloc or '') + url
384 if scheme:
385 url = scheme + ':' + url
386 if query:
387 url = url + '?' + query
388 if fragment:
389 url = url + '#' + fragment
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000390 return _coerce_result(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000391
392def urljoin(base, url, allow_fragments=True):
393 """Join a base URL and a possibly relative URL to form an absolute
394 interpretation of the latter."""
395 if not base:
396 return url
397 if not url:
398 return base
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000399 base, url, _coerce_result = _coerce_args(base, url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000400 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
401 urlparse(base, '', allow_fragments)
402 scheme, netloc, path, params, query, fragment = \
403 urlparse(url, bscheme, allow_fragments)
404 if scheme != bscheme or scheme not in uses_relative:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000405 return _coerce_result(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000406 if scheme in uses_netloc:
407 if netloc:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000408 return _coerce_result(urlunparse((scheme, netloc, path,
409 params, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000410 netloc = bnetloc
411 if path[:1] == '/':
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000412 return _coerce_result(urlunparse((scheme, netloc, path,
413 params, query, fragment)))
Senthil Kumarandca5b862010-12-17 04:48:45 +0000414 if not path and not params:
Facundo Batista23e38562008-08-14 16:55:14 +0000415 path = bpath
Senthil Kumarandca5b862010-12-17 04:48:45 +0000416 params = bparams
Facundo Batista23e38562008-08-14 16:55:14 +0000417 if not query:
418 query = bquery
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000419 return _coerce_result(urlunparse((scheme, netloc, path,
420 params, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000421 segments = bpath.split('/')[:-1] + path.split('/')
422 # XXX The stuff below is bogus in various ways...
423 if segments[-1] == '.':
424 segments[-1] = ''
425 while '.' in segments:
426 segments.remove('.')
427 while 1:
428 i = 1
429 n = len(segments) - 1
430 while i < n:
431 if (segments[i] == '..'
432 and segments[i-1] not in ('', '..')):
433 del segments[i-1:i+1]
434 break
435 i = i+1
436 else:
437 break
438 if segments == ['', '..']:
439 segments[-1] = ''
440 elif len(segments) >= 2 and segments[-1] == '..':
441 segments[-2:] = ['']
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000442 return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),
443 params, query, fragment)))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000444
445def urldefrag(url):
446 """Removes any existing fragment from URL.
447
448 Returns a tuple of the defragmented URL and the fragment. If
449 the URL contained no fragments, the second element is the
450 empty string.
451 """
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000452 url, _coerce_result = _coerce_args(url)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000453 if '#' in url:
454 s, n, p, a, q, frag = urlparse(url)
455 defrag = urlunparse((s, n, p, a, q, ''))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000456 else:
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000457 frag = ''
458 defrag = url
459 return _coerce_result(DefragResult(defrag, frag))
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000460
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000461def unquote_to_bytes(string):
462 """unquote_to_bytes('abc%20def') -> b'abc def'."""
463 # Note: strings are encoded as UTF-8. This is only an issue if it contains
464 # unescaped non-ASCII characters, which URIs should not.
Florent Xicluna82a3f8a2010-08-14 18:30:35 +0000465 if not string:
466 # Is it a string-like object?
467 string.split
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000468 return b''
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000469 if isinstance(string, str):
470 string = string.encode('utf-8')
471 res = string.split(b'%')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000472 if len(res) == 1:
473 return string
474 string = res[0]
475 for item in res[1:]:
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000476 try:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000477 string += bytes([int(item[:2], 16)]) + item[2:]
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000478 except ValueError:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000479 string += b'%' + item
480 return string
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000481
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000482def unquote(string, encoding='utf-8', errors='replace'):
483 """Replace %xx escapes by their single-character equivalent. The optional
484 encoding and errors parameters specify how to decode percent-encoded
485 sequences into Unicode characters, as accepted by the bytes.decode()
486 method.
487 By default, percent-encoded sequences are decoded with UTF-8, and invalid
488 sequences are replaced by a placeholder character.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000489
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000490 unquote('abc%20def') -> 'abc def'.
491 """
Florent Xiclunac049fca2010-07-31 08:56:55 +0000492 if string == '':
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000493 return string
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000494 res = string.split('%')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000495 if len(res) == 1:
496 return string
497 if encoding is None:
498 encoding = 'utf-8'
499 if errors is None:
500 errors = 'replace'
Florent Xicluna0f78a942010-05-17 18:01:22 +0000501 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000502 pct_sequence = b''
503 string = res[0]
504 for item in res[1:]:
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000505 try:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000506 if not item:
507 raise ValueError
508 pct_sequence += bytes.fromhex(item[:2])
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000509 rest = item[2:]
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000510 if not rest:
511 # This segment was just a single percent-encoded character.
512 # May be part of a sequence of code units, so delay decoding.
513 # (Stored in pct_sequence).
514 continue
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000515 except ValueError:
516 rest = '%' + item
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000517 # Encountered non-percent-encoded characters. Flush the current
518 # pct_sequence.
519 string += pct_sequence.decode(encoding, errors) + rest
520 pct_sequence = b''
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000521 if pct_sequence:
522 # Flush the final pct_sequence
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000523 string += pct_sequence.decode(encoding, errors)
524 return string
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000525
Victor Stinnerac71c542011-01-14 12:52:12 +0000526def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
527 encoding='utf-8', errors='replace'):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000528 """Parse a query given as a string argument.
529
530 Arguments:
531
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000532 qs: percent-encoded query string to be parsed
Facundo Batistac469d4c2008-09-03 22:49:01 +0000533
534 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000535 percent-encoded queries should be treated as blank strings.
Facundo Batistac469d4c2008-09-03 22:49:01 +0000536 A true value indicates that blanks should be retained as
537 blank strings. The default false value indicates that
538 blank values are to be ignored and treated as if they were
539 not included.
540
541 strict_parsing: flag indicating what to do with parsing errors.
542 If false (the default), errors are silently ignored.
543 If true, errors raise a ValueError exception.
Victor Stinnerac71c542011-01-14 12:52:12 +0000544
545 encoding and errors: specify how to decode percent-encoded sequences
546 into Unicode characters, as accepted by the bytes.decode() method.
Facundo Batistac469d4c2008-09-03 22:49:01 +0000547 """
548 dict = {}
Victor Stinnerac71c542011-01-14 12:52:12 +0000549 pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
550 encoding=encoding, errors=errors)
551 for name, value in pairs:
Facundo Batistac469d4c2008-09-03 22:49:01 +0000552 if name in dict:
553 dict[name].append(value)
554 else:
555 dict[name] = [value]
556 return dict
557
Victor Stinnerac71c542011-01-14 12:52:12 +0000558def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
559 encoding='utf-8', errors='replace'):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000560 """Parse a query given as a string argument.
561
562 Arguments:
563
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000564 qs: percent-encoded query string to be parsed
Facundo Batistac469d4c2008-09-03 22:49:01 +0000565
566 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000567 percent-encoded queries should be treated as blank strings. A
Facundo Batistac469d4c2008-09-03 22:49:01 +0000568 true value indicates that blanks should be retained as blank
569 strings. The default false value indicates that blank values
570 are to be ignored and treated as if they were not included.
571
572 strict_parsing: flag indicating what to do with parsing errors. If
573 false (the default), errors are silently ignored. If true,
574 errors raise a ValueError exception.
575
Victor Stinnerac71c542011-01-14 12:52:12 +0000576 encoding and errors: specify how to decode percent-encoded sequences
577 into Unicode characters, as accepted by the bytes.decode() method.
578
Facundo Batistac469d4c2008-09-03 22:49:01 +0000579 Returns a list, as G-d intended.
580 """
Nick Coghlan9fc443c2010-11-30 15:48:08 +0000581 qs, _coerce_result = _coerce_args(qs)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000582 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
583 r = []
584 for name_value in pairs:
585 if not name_value and not strict_parsing:
586 continue
587 nv = name_value.split('=', 1)
588 if len(nv) != 2:
589 if strict_parsing:
590 raise ValueError("bad query field: %r" % (name_value,))
591 # Handle case of a control-name with no equal sign
592 if keep_blank_values:
593 nv.append('')
594 else:
595 continue
596 if len(nv[1]) or keep_blank_values:
Victor Stinnerac71c542011-01-14 12:52:12 +0000597 name = nv[0].replace('+', ' ')
598 name = unquote(name, encoding=encoding, errors=errors)
599 name = _coerce_result(name)
600 value = nv[1].replace('+', ' ')
601 value = unquote(value, encoding=encoding, errors=errors)
602 value = _coerce_result(value)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000603 r.append((name, value))
Facundo Batistac469d4c2008-09-03 22:49:01 +0000604 return r
605
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000606def unquote_plus(string, encoding='utf-8', errors='replace'):
607 """Like unquote(), but also replace plus signs by spaces, as required for
608 unquoting HTML form values.
609
610 unquote_plus('%7e/abc+def') -> '~/abc def'
611 """
612 string = string.replace('+', ' ')
613 return unquote(string, encoding, errors)
614
615_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
616 b'abcdefghijklmnopqrstuvwxyz'
617 b'0123456789'
618 b'_.-')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000619_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
620_safe_quoters = {}
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000621
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000622class Quoter(collections.defaultdict):
623 """A mapping from bytes (in range(0,256)) to strings.
624
625 String values are percent-encoded byte values, unless the key < 128, and
626 in the "safe" set (either the specified safe set, or default set).
627 """
628 # Keeps a cache internally, using defaultdict, for efficiency (lookups
629 # of cached keys don't call Python code at all).
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000630 def __init__(self, safe):
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000631 """safe: bytes object."""
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000632 self.safe = _ALWAYS_SAFE.union(safe)
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000633
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000634 def __repr__(self):
635 # Without this, will just display as a defaultdict
636 return "<Quoter %r>" % dict(self)
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000637
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000638 def __missing__(self, b):
639 # Handle a cache miss. Store quoted string in cache and return.
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000640 res = chr(b) if b in self.safe else '%{:02X}'.format(b)
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000641 self[b] = res
642 return res
643
644def quote(string, safe='/', encoding=None, errors=None):
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000645 """quote('abc def') -> 'abc%20def'
646
647 Each part of a URL, e.g. the path info, the query, etc., has a
648 different set of reserved characters that must be quoted.
649
650 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
651 the following reserved characters.
652
653 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
654 "$" | ","
655
656 Each of these characters is reserved in some component of a URL,
657 but not necessarily in all of them.
658
659 By default, the quote function is intended for quoting the path
660 section of a URL. Thus, it will not encode '/'. This character
661 is reserved, but in typical usage the quote function is being
662 called on a path where the existing slash characters are used as
663 reserved characters.
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000664
665 string and safe may be either str or bytes objects. encoding must
666 not be specified if string is a str.
667
668 The optional encoding and errors parameters specify how to deal with
669 non-ASCII characters, as accepted by the str.encode method.
670 By default, encoding='utf-8' (characters are encoded with UTF-8), and
671 errors='strict' (unsupported characters raise a UnicodeEncodeError).
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000672 """
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000673 if isinstance(string, str):
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000674 if not string:
675 return string
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000676 if encoding is None:
677 encoding = 'utf-8'
678 if errors is None:
679 errors = 'strict'
680 string = string.encode(encoding, errors)
681 else:
682 if encoding is not None:
683 raise TypeError("quote() doesn't support 'encoding' for bytes")
684 if errors is not None:
685 raise TypeError("quote() doesn't support 'errors' for bytes")
686 return quote_from_bytes(string, safe)
687
688def quote_plus(string, safe='', encoding=None, errors=None):
689 """Like quote(), but also replace ' ' with '+', as required for quoting
690 HTML form values. Plus signs in the original string are escaped unless
691 they are included in safe. It also does not have safe default to '/'.
692 """
Jeremy Hyltonf8198862009-03-26 16:55:08 +0000693 # Check if ' ' in string, where string may either be a str or bytes. If
694 # there are no spaces, the regular quote will produce the right answer.
695 if ((isinstance(string, str) and ' ' not in string) or
696 (isinstance(string, bytes) and b' ' not in string)):
697 return quote(string, safe, encoding, errors)
698 if isinstance(safe, str):
699 space = ' '
700 else:
701 space = b' '
Georg Brandlfaf41492009-05-26 18:31:11 +0000702 string = quote(string, safe + space, encoding, errors)
Jeremy Hyltonf8198862009-03-26 16:55:08 +0000703 return string.replace(' ', '+')
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000704
705def quote_from_bytes(bs, safe='/'):
706 """Like quote(), but accepts a bytes object rather than a str, and does
707 not perform string-to-bytes encoding. It always returns an ASCII string.
708 quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB'
709 """
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000710 if not isinstance(bs, (bytes, bytearray)):
711 raise TypeError("quote_from_bytes() expected bytes")
712 if not bs:
713 return ''
Guido van Rossum52dbbb92008-08-18 21:44:30 +0000714 if isinstance(safe, str):
715 # Normalize 'safe' by converting to bytes and removing non-ASCII chars
716 safe = safe.encode('ascii', 'ignore')
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000717 else:
718 safe = bytes([c for c in safe if c < 128])
719 if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
720 return bs.decode()
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000721 try:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000722 quoter = _safe_quoters[safe]
Guido van Rossumdf9f1ec2008-08-06 19:31:34 +0000723 except KeyError:
Florent Xiclunac7b8e862010-05-17 17:33:07 +0000724 _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
725 return ''.join([quoter(char) for char in bs])
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000726
Senthil Kumarandf022da2010-07-03 17:48:22 +0000727def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000728 """Encode a sequence of two-element tuples or dictionary into a URL query string.
729
730 If any values in the query arg are sequences and doseq is true, each
731 sequence element is converted to a separate parameter.
732
733 If the query arg is a sequence of two-element tuples, the order of the
734 parameters in the output will match the order of parameters in the
735 input.
Senthil Kumarandf022da2010-07-03 17:48:22 +0000736
737 The query arg may be either a string or a bytes type. When query arg is a
738 string, the safe, encoding and error parameters are sent the quote_plus for
739 encoding.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000740 """
741
Jeremy Hyltona4de60a2009-03-26 14:49:26 +0000742 if hasattr(query, "items"):
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000743 query = query.items()
744 else:
Jeremy Hylton230feba2009-03-26 16:56:59 +0000745 # It's a bother at times that strings and string-like objects are
746 # sequences.
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000747 try:
748 # non-sequence items should not work with len()
749 # non-empty strings will fail this
750 if len(query) and not isinstance(query[0], tuple):
751 raise TypeError
Jeremy Hylton230feba2009-03-26 16:56:59 +0000752 # Zero-length sequences of all types will get here and succeed,
753 # but that's a minor nit. Since the original implementation
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000754 # allowed empty dicts that type of behavior probably should be
755 # preserved for consistency
756 except TypeError:
Jeremy Hyltona4de60a2009-03-26 14:49:26 +0000757 ty, va, tb = sys.exc_info()
758 raise TypeError("not a valid non-string sequence "
759 "or mapping object").with_traceback(tb)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000760
761 l = []
762 if not doseq:
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000763 for k, v in query:
Senthil Kumarandf022da2010-07-03 17:48:22 +0000764 if isinstance(k, bytes):
765 k = quote_plus(k, safe)
766 else:
767 k = quote_plus(str(k), safe, encoding, errors)
768
769 if isinstance(v, bytes):
770 v = quote_plus(v, safe)
771 else:
772 v = quote_plus(str(v), safe, encoding, errors)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000773 l.append(k + '=' + v)
774 else:
775 for k, v in query:
Senthil Kumarandf022da2010-07-03 17:48:22 +0000776 if isinstance(k, bytes):
777 k = quote_plus(k, safe)
778 else:
779 k = quote_plus(str(k), safe, encoding, errors)
780
781 if isinstance(v, bytes):
782 v = quote_plus(v, safe)
783 l.append(k + '=' + v)
784 elif isinstance(v, str):
785 v = quote_plus(v, safe, encoding, errors)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000786 l.append(k + '=' + v)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000787 else:
788 try:
Jeremy Hylton230feba2009-03-26 16:56:59 +0000789 # Is this a sufficient test for sequence-ness?
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000790 x = len(v)
791 except TypeError:
792 # not a sequence
Senthil Kumarandf022da2010-07-03 17:48:22 +0000793 v = quote_plus(str(v), safe, encoding, errors)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000794 l.append(k + '=' + v)
795 else:
796 # loop over the sequence
797 for elt in v:
Senthil Kumarandf022da2010-07-03 17:48:22 +0000798 if isinstance(elt, bytes):
799 elt = quote_plus(elt, safe)
800 else:
801 elt = quote_plus(str(elt), safe, encoding, errors)
802 l.append(k + '=' + elt)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000803 return '&'.join(l)
804
805# Utilities to parse URLs (most of these return None for missing parts):
806# unwrap('<URL:type://host/path>') --> 'type://host/path'
807# splittype('type:opaquestring') --> 'type', 'opaquestring'
808# splithost('//host[:port]/path') --> 'host[:port]', '/path'
809# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
810# splitpasswd('user:passwd') -> 'user', 'passwd'
811# splitport('host:port') --> 'host', 'port'
812# splitquery('/path?query') --> '/path', 'query'
813# splittag('/path#tag') --> '/path', 'tag'
814# splitattr('/path;attr1=value1;attr2=value2;...') ->
815# '/path', ['attr1=value1', 'attr2=value2', ...]
816# splitvalue('attr=value') --> 'attr', 'value'
817# urllib.parse.unquote('abc%20def') -> 'abc def'
818# quote('abc def') -> 'abc%20def')
819
Georg Brandl13e89462008-07-01 19:56:00 +0000820def to_bytes(url):
821 """to_bytes(u"URL") --> 'URL'."""
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000822 # Most URL schemes require ASCII. If that changes, the conversion
823 # can be relaxed.
Georg Brandl13e89462008-07-01 19:56:00 +0000824 # XXX get rid of to_bytes()
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000825 if isinstance(url, str):
826 try:
827 url = url.encode("ASCII").decode()
828 except UnicodeError:
829 raise UnicodeError("URL " + repr(url) +
830 " contains non-ASCII characters")
831 return url
832
833def unwrap(url):
834 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
835 url = str(url).strip()
836 if url[:1] == '<' and url[-1:] == '>':
837 url = url[1:-1].strip()
838 if url[:4] == 'URL:': url = url[4:].strip()
839 return url
840
841_typeprog = None
842def splittype(url):
843 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
844 global _typeprog
845 if _typeprog is None:
846 import re
847 _typeprog = re.compile('^([^/:]+):')
848
849 match = _typeprog.match(url)
850 if match:
851 scheme = match.group(1)
852 return scheme.lower(), url[len(scheme) + 1:]
853 return None, url
854
855_hostprog = None
856def splithost(url):
857 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
858 global _hostprog
859 if _hostprog is None:
860 import re
861 _hostprog = re.compile('^//([^/?]*)(.*)$')
862
863 match = _hostprog.match(url)
Senthil Kumaranc2958622010-11-22 04:48:26 +0000864 if match:
865 host_port = match.group(1)
866 path = match.group(2)
867 if path and not path.startswith('/'):
868 path = '/' + path
869 return host_port, path
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000870 return None, url
871
872_userprog = None
873def splituser(host):
874 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
875 global _userprog
876 if _userprog is None:
877 import re
878 _userprog = re.compile('^(.*)@(.*)$')
879
880 match = _userprog.match(host)
Senthil Kumarandaa29d02010-11-18 15:36:41 +0000881 if match: return match.group(1, 2)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000882 return None, host
883
884_passwdprog = None
885def splitpasswd(user):
886 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
887 global _passwdprog
888 if _passwdprog is None:
889 import re
Senthil Kumaraneaaec272009-03-30 21:54:41 +0000890 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000891
892 match = _passwdprog.match(user)
893 if match: return match.group(1, 2)
894 return user, None
895
896# splittag('/path#tag') --> '/path', 'tag'
897_portprog = None
898def splitport(host):
899 """splitport('host:port') --> 'host', 'port'."""
900 global _portprog
901 if _portprog is None:
902 import re
903 _portprog = re.compile('^(.*):([0-9]+)$')
904
905 match = _portprog.match(host)
906 if match: return match.group(1, 2)
907 return host, None
908
909_nportprog = None
910def splitnport(host, defport=-1):
911 """Split host and port, returning numeric port.
912 Return given default port if no ':' found; defaults to -1.
913 Return numerical port if a valid number are found after ':'.
914 Return None if ':' but not a valid number."""
915 global _nportprog
916 if _nportprog is None:
917 import re
918 _nportprog = re.compile('^(.*):(.*)$')
919
920 match = _nportprog.match(host)
921 if match:
922 host, port = match.group(1, 2)
923 try:
924 if not port: raise ValueError("no digits")
925 nport = int(port)
926 except ValueError:
927 nport = None
928 return host, nport
929 return host, defport
930
931_queryprog = None
932def splitquery(url):
933 """splitquery('/path?query') --> '/path', 'query'."""
934 global _queryprog
935 if _queryprog is None:
936 import re
937 _queryprog = re.compile('^(.*)\?([^?]*)$')
938
939 match = _queryprog.match(url)
940 if match: return match.group(1, 2)
941 return url, None
942
943_tagprog = None
944def splittag(url):
945 """splittag('/path#tag') --> '/path', 'tag'."""
946 global _tagprog
947 if _tagprog is None:
948 import re
949 _tagprog = re.compile('^(.*)#([^#]*)$')
950
951 match = _tagprog.match(url)
952 if match: return match.group(1, 2)
953 return url, None
954
955def splitattr(url):
956 """splitattr('/path;attr1=value1;attr2=value2;...') ->
957 '/path', ['attr1=value1', 'attr2=value2', ...]."""
958 words = url.split(';')
959 return words[0], words[1:]
960
961_valueprog = None
962def splitvalue(attr):
963 """splitvalue('attr=value') --> 'attr', 'value'."""
964 global _valueprog
965 if _valueprog is None:
966 import re
967 _valueprog = re.compile('^([^=]*)=(.*)$')
968
969 match = _valueprog.match(attr)
970 if match: return match.group(1, 2)
971 return attr, None