| """Parse (absolute and relative) URLs. | 
 |  | 
 | urlparse module is based upon the following RFC specifications. | 
 |  | 
 | RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding | 
 | and L.  Masinter, January 2005. | 
 |  | 
 | RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter | 
 | and L.Masinter, December 1999. | 
 |  | 
 | RFC 2396:  "Uniform Resource Identifiers (URI)": Generic Syntax by T. | 
 | Berners-Lee, R. Fielding, and L. Masinter, August 1998. | 
 |  | 
 | RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. | 
 |  | 
 | RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June | 
 | 1995. | 
 |  | 
 | RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. | 
 | McCahill, December 1994 | 
 |  | 
 | RFC 3986 is considered the current standard and any future changes to | 
 | urlparse module should conform with it.  The urlparse module is | 
 | currently not entirely compliant with this RFC due to defacto | 
 | scenarios for parsing, and for backward compatibility purposes, some | 
 | parsing quirks from older RFCs are retained. The testcases in | 
 | test_urlparse.py provides a good indicator of parsing behavior. | 
 | """ | 
 |  | 
 | import sys | 
 | import collections | 
 |  | 
 | __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", | 
 |            "urlsplit", "urlunsplit", "urlencode", "parse_qs", | 
 |            "parse_qsl", "quote", "quote_plus", "quote_from_bytes", | 
 |            "unquote", "unquote_plus", "unquote_to_bytes"] | 
 |  | 
 | # A classification of schemes ('' means apply by default) | 
 | uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', | 
 |                  'wais', 'file', 'https', 'shttp', 'mms', | 
 |                  'prospero', 'rtsp', 'rtspu', '', 'sftp'] | 
 | uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', | 
 |                'imap', 'wais', 'file', 'mms', 'https', 'shttp', | 
 |                'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', | 
 |                'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] | 
 | non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', | 
 |                     'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] | 
 | uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', | 
 |                'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', | 
 |                'mms', '', 'sftp'] | 
 | uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', | 
 |               'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] | 
 | uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', | 
 |                  'nntp', 'wais', 'https', 'shttp', 'snews', | 
 |                  'file', 'prospero', ''] | 
 |  | 
 | # Characters valid in scheme names | 
 | scheme_chars = ('abcdefghijklmnopqrstuvwxyz' | 
 |                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | 
 |                 '0123456789' | 
 |                 '+-.') | 
 |  | 
 | # XXX: Consider replacing with functools.lru_cache | 
 | MAX_CACHE_SIZE = 20 | 
 | _parse_cache = {} | 
 |  | 
 | def clear_cache(): | 
 |     """Clear the parse cache and the quoters cache.""" | 
 |     _parse_cache.clear() | 
 |     _safe_quoters.clear() | 
 |  | 
 |  | 
 | # Helpers for bytes handling | 
 | # For 3.2, we deliberately require applications that | 
 | # handle improperly quoted URLs to do their own | 
 | # decoding and encoding. If valid use cases are | 
 | # presented, we may relax this by using latin-1 | 
 | # decoding internally for 3.3 | 
 | _implicit_encoding = 'ascii' | 
 | _implicit_errors = 'strict' | 
 |  | 
 | def _noop(obj): | 
 |     return obj | 
 |  | 
 | def _encode_result(obj, encoding=_implicit_encoding, | 
 |                         errors=_implicit_errors): | 
 |     return obj.encode(encoding, errors) | 
 |  | 
 | def _decode_args(args, encoding=_implicit_encoding, | 
 |                        errors=_implicit_errors): | 
 |     return tuple(x.decode(encoding, errors) if x else '' for x in args) | 
 |  | 
 | def _coerce_args(*args): | 
 |     # Invokes decode if necessary to create str args | 
 |     # and returns the coerced inputs along with | 
 |     # an appropriate result coercion function | 
 |     #   - noop for str inputs | 
 |     #   - encoding function otherwise | 
 |     str_input = isinstance(args[0], str) | 
 |     for arg in args[1:]: | 
 |         # We special-case the empty string to support the | 
 |         # "scheme=''" default argument to some functions | 
 |         if arg and isinstance(arg, str) != str_input: | 
 |             raise TypeError("Cannot mix str and non-str arguments") | 
 |     if str_input: | 
 |         return args + (_noop,) | 
 |     return _decode_args(args) + (_encode_result,) | 
 |  | 
 | # Result objects are more helpful than simple tuples | 
 | class _ResultMixinStr(object): | 
 |     """Standard approach to encoding parsed results from str to bytes""" | 
 |     __slots__ = () | 
 |  | 
 |     def encode(self, encoding='ascii', errors='strict'): | 
 |         return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) | 
 |  | 
 |  | 
 | class _ResultMixinBytes(object): | 
 |     """Standard approach to decoding parsed results from bytes to str""" | 
 |     __slots__ = () | 
 |  | 
 |     def decode(self, encoding='ascii', errors='strict'): | 
 |         return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) | 
 |  | 
 |  | 
 | class _NetlocResultMixinBase(object): | 
 |     """Shared methods for the parsed result objects containing a netloc element""" | 
 |     __slots__ = () | 
 |  | 
 |     @property | 
 |     def username(self): | 
 |         return self._userinfo[0] | 
 |  | 
 |     @property | 
 |     def password(self): | 
 |         return self._userinfo[1] | 
 |  | 
 |     @property | 
 |     def hostname(self): | 
 |         hostname = self._hostinfo[0] | 
 |         if not hostname: | 
 |             hostname = None | 
 |         elif hostname is not None: | 
 |             hostname = hostname.lower() | 
 |         return hostname | 
 |  | 
 |     @property | 
 |     def port(self): | 
 |         port = self._hostinfo[1] | 
 |         if port is not None: | 
 |             port = int(port, 10) | 
 |         return port | 
 |  | 
 |  | 
 | class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): | 
 |     __slots__ = () | 
 |  | 
 |     @property | 
 |     def _userinfo(self): | 
 |         netloc = self.netloc | 
 |         userinfo, have_info, hostinfo = netloc.rpartition('@') | 
 |         if have_info: | 
 |             username, have_password, password = userinfo.partition(':') | 
 |             if not have_password: | 
 |                 password = None | 
 |         else: | 
 |             username = password = None | 
 |         return username, password | 
 |  | 
 |     @property | 
 |     def _hostinfo(self): | 
 |         netloc = self.netloc | 
 |         _, _, hostinfo = netloc.rpartition('@') | 
 |         _, have_open_br, bracketed = hostinfo.partition('[') | 
 |         if have_open_br: | 
 |             hostname, _, port = bracketed.partition(']') | 
 |             _, have_port, port = port.partition(':') | 
 |         else: | 
 |             hostname, have_port, port = hostinfo.partition(':') | 
 |         if not have_port: | 
 |             port = None | 
 |         return hostname, port | 
 |  | 
 |  | 
 | class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): | 
 |     __slots__ = () | 
 |  | 
 |     @property | 
 |     def _userinfo(self): | 
 |         netloc = self.netloc | 
 |         userinfo, have_info, hostinfo = netloc.rpartition(b'@') | 
 |         if have_info: | 
 |             username, have_password, password = userinfo.partition(b':') | 
 |             if not have_password: | 
 |                 password = None | 
 |         else: | 
 |             username = password = None | 
 |         return username, password | 
 |  | 
 |     @property | 
 |     def _hostinfo(self): | 
 |         netloc = self.netloc | 
 |         _, _, hostinfo = netloc.rpartition(b'@') | 
 |         _, have_open_br, bracketed = hostinfo.partition(b'[') | 
 |         if have_open_br: | 
 |             hostname, _, port = bracketed.partition(b']') | 
 |             _, have_port, port = port.partition(b':') | 
 |         else: | 
 |             hostname, have_port, port = hostinfo.partition(b':') | 
 |         if not have_port: | 
 |             port = None | 
 |         return hostname, port | 
 |  | 
 |  | 
 | from collections import namedtuple | 
 |  | 
 | _DefragResultBase = namedtuple('DefragResult', 'url fragment') | 
 | _SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') | 
 | _ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') | 
 |  | 
 | # For backwards compatibility, alias _NetlocResultMixinStr | 
 | # ResultBase is no longer part of the documented API, but it is | 
 | # retained since deprecating it isn't worth the hassle | 
 | ResultBase = _NetlocResultMixinStr | 
 |  | 
 | # Structured result objects for string data | 
 | class DefragResult(_DefragResultBase, _ResultMixinStr): | 
 |     __slots__ = () | 
 |     def geturl(self): | 
 |         if self.fragment: | 
 |             return self.url + '#' + self.fragment | 
 |         else: | 
 |             return self.url | 
 |  | 
 | class SplitResult(_SplitResultBase, _NetlocResultMixinStr): | 
 |     __slots__ = () | 
 |     def geturl(self): | 
 |         return urlunsplit(self) | 
 |  | 
 | class ParseResult(_ParseResultBase, _NetlocResultMixinStr): | 
 |     __slots__ = () | 
 |     def geturl(self): | 
 |         return urlunparse(self) | 
 |  | 
 | # Structured result objects for bytes data | 
 | class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): | 
 |     __slots__ = () | 
 |     def geturl(self): | 
 |         if self.fragment: | 
 |             return self.url + b'#' + self.fragment | 
 |         else: | 
 |             return self.url | 
 |  | 
 | class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): | 
 |     __slots__ = () | 
 |     def geturl(self): | 
 |         return urlunsplit(self) | 
 |  | 
 | class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): | 
 |     __slots__ = () | 
 |     def geturl(self): | 
 |         return urlunparse(self) | 
 |  | 
 | # Set up the encode/decode result pairs | 
 | def _fix_result_transcoding(): | 
 |     _result_pairs = ( | 
 |         (DefragResult, DefragResultBytes), | 
 |         (SplitResult, SplitResultBytes), | 
 |         (ParseResult, ParseResultBytes), | 
 |     ) | 
 |     for _decoded, _encoded in _result_pairs: | 
 |         _decoded._encoded_counterpart = _encoded | 
 |         _encoded._decoded_counterpart = _decoded | 
 |  | 
 | _fix_result_transcoding() | 
 | del _fix_result_transcoding | 
 |  | 
 | def urlparse(url, scheme='', allow_fragments=True): | 
 |     """Parse a URL into 6 components: | 
 |     <scheme>://<netloc>/<path>;<params>?<query>#<fragment> | 
 |     Return a 6-tuple: (scheme, netloc, path, params, query, fragment). | 
 |     Note that we don't break the components up in smaller bits | 
 |     (e.g. netloc is a single string) and we don't expand % escapes.""" | 
 |     url, scheme, _coerce_result = _coerce_args(url, scheme) | 
 |     tuple = urlsplit(url, scheme, allow_fragments) | 
 |     scheme, netloc, url, query, fragment = tuple | 
 |     if scheme in uses_params and ';' in url: | 
 |         url, params = _splitparams(url) | 
 |     else: | 
 |         params = '' | 
 |     result = ParseResult(scheme, netloc, url, params, query, fragment) | 
 |     return _coerce_result(result) | 
 |  | 
 | def _splitparams(url): | 
 |     if '/'  in url: | 
 |         i = url.find(';', url.rfind('/')) | 
 |         if i < 0: | 
 |             return url, '' | 
 |     else: | 
 |         i = url.find(';') | 
 |     return url[:i], url[i+1:] | 
 |  | 
 | def _splitnetloc(url, start=0): | 
 |     delim = len(url)   # position of end of domain part of url, default is end | 
 |     for c in '/?#':    # look for delimiters; the order is NOT important | 
 |         wdelim = url.find(c, start)        # find first of this delim | 
 |         if wdelim >= 0:                    # if found | 
 |             delim = min(delim, wdelim)     # use earliest delim position | 
 |     return url[start:delim], url[delim:]   # return (domain, rest) | 
 |  | 
 | def urlsplit(url, scheme='', allow_fragments=True): | 
 |     """Parse a URL into 5 components: | 
 |     <scheme>://<netloc>/<path>?<query>#<fragment> | 
 |     Return a 5-tuple: (scheme, netloc, path, query, fragment). | 
 |     Note that we don't break the components up in smaller bits | 
 |     (e.g. netloc is a single string) and we don't expand % escapes.""" | 
 |     url, scheme, _coerce_result = _coerce_args(url, scheme) | 
 |     allow_fragments = bool(allow_fragments) | 
 |     key = url, scheme, allow_fragments, type(url), type(scheme) | 
 |     cached = _parse_cache.get(key, None) | 
 |     if cached: | 
 |         return _coerce_result(cached) | 
 |     if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth | 
 |         clear_cache() | 
 |     netloc = query = fragment = '' | 
 |     i = url.find(':') | 
 |     if i > 0: | 
 |         if url[:i] == 'http': # optimize the common case | 
 |             scheme = url[:i].lower() | 
 |             url = url[i+1:] | 
 |             if url[:2] == '//': | 
 |                 netloc, url = _splitnetloc(url, 2) | 
 |                 if (('[' in netloc and ']' not in netloc) or | 
 |                         (']' in netloc and '[' not in netloc)): | 
 |                     raise ValueError("Invalid IPv6 URL") | 
 |             if allow_fragments and '#' in url: | 
 |                 url, fragment = url.split('#', 1) | 
 |             if '?' in url: | 
 |                 url, query = url.split('?', 1) | 
 |             v = SplitResult(scheme, netloc, url, query, fragment) | 
 |             _parse_cache[key] = v | 
 |             return _coerce_result(v) | 
 |         if url.endswith(':') or not url[i+1].isdigit(): | 
 |             for c in url[:i]: | 
 |                 if c not in scheme_chars: | 
 |                     break | 
 |             else: | 
 |                 scheme, url = url[:i].lower(), url[i+1:] | 
 |     if url[:2] == '//': | 
 |         netloc, url = _splitnetloc(url, 2) | 
 |         if (('[' in netloc and ']' not in netloc) or | 
 |                 (']' in netloc and '[' not in netloc)): | 
 |             raise ValueError("Invalid IPv6 URL") | 
 |     if allow_fragments and scheme in uses_fragment and '#' in url: | 
 |         url, fragment = url.split('#', 1) | 
 |     if scheme in uses_query and '?' in url: | 
 |         url, query = url.split('?', 1) | 
 |     v = SplitResult(scheme, netloc, url, query, fragment) | 
 |     _parse_cache[key] = v | 
 |     return _coerce_result(v) | 
 |  | 
 | def urlunparse(components): | 
 |     """Put a parsed URL back together again.  This may result in a | 
 |     slightly different, but equivalent URL, if the URL that was parsed | 
 |     originally had redundant delimiters, e.g. a ? with an empty query | 
 |     (the draft states that these are equivalent).""" | 
 |     scheme, netloc, url, params, query, fragment, _coerce_result = ( | 
 |                                                   _coerce_args(*components)) | 
 |     if params: | 
 |         url = "%s;%s" % (url, params) | 
 |     return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) | 
 |  | 
 | def urlunsplit(components): | 
 |     """Combine the elements of a tuple as returned by urlsplit() into a | 
 |     complete URL as a string. The data argument can be any five-item iterable. | 
 |     This may result in a slightly different, but equivalent URL, if the URL that | 
 |     was parsed originally had unnecessary delimiters (for example, a ? with an | 
 |     empty query; the RFC states that these are equivalent).""" | 
 |     scheme, netloc, url, query, fragment, _coerce_result = ( | 
 |                                           _coerce_args(*components)) | 
 |     if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): | 
 |         if url and url[:1] != '/': url = '/' + url | 
 |         url = '//' + (netloc or '') + url | 
 |     if scheme: | 
 |         url = scheme + ':' + url | 
 |     if query: | 
 |         url = url + '?' + query | 
 |     if fragment: | 
 |         url = url + '#' + fragment | 
 |     return _coerce_result(url) | 
 |  | 
 | def urljoin(base, url, allow_fragments=True): | 
 |     """Join a base URL and a possibly relative URL to form an absolute | 
 |     interpretation of the latter.""" | 
 |     if not base: | 
 |         return url | 
 |     if not url: | 
 |         return base | 
 |     base, url, _coerce_result = _coerce_args(base, url) | 
 |     bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ | 
 |             urlparse(base, '', allow_fragments) | 
 |     scheme, netloc, path, params, query, fragment = \ | 
 |             urlparse(url, bscheme, allow_fragments) | 
 |     if scheme != bscheme or scheme not in uses_relative: | 
 |         return _coerce_result(url) | 
 |     if scheme in uses_netloc: | 
 |         if netloc: | 
 |             return _coerce_result(urlunparse((scheme, netloc, path, | 
 |                                               params, query, fragment))) | 
 |         netloc = bnetloc | 
 |     if path[:1] == '/': | 
 |         return _coerce_result(urlunparse((scheme, netloc, path, | 
 |                                           params, query, fragment))) | 
 |     if not path and not params: | 
 |         path = bpath | 
 |         params = bparams | 
 |         if not query: | 
 |             query = bquery | 
 |         return _coerce_result(urlunparse((scheme, netloc, path, | 
 |                                           params, query, fragment))) | 
 |     segments = bpath.split('/')[:-1] + path.split('/') | 
 |     # XXX The stuff below is bogus in various ways... | 
 |     if segments[-1] == '.': | 
 |         segments[-1] = '' | 
 |     while '.' in segments: | 
 |         segments.remove('.') | 
 |     while 1: | 
 |         i = 1 | 
 |         n = len(segments) - 1 | 
 |         while i < n: | 
 |             if (segments[i] == '..' | 
 |                 and segments[i-1] not in ('', '..')): | 
 |                 del segments[i-1:i+1] | 
 |                 break | 
 |             i = i+1 | 
 |         else: | 
 |             break | 
 |     if segments == ['', '..']: | 
 |         segments[-1] = '' | 
 |     elif len(segments) >= 2 and segments[-1] == '..': | 
 |         segments[-2:] = [''] | 
 |     return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), | 
 |                                       params, query, fragment))) | 
 |  | 
 | def urldefrag(url): | 
 |     """Removes any existing fragment from URL. | 
 |  | 
 |     Returns a tuple of the defragmented URL and the fragment.  If | 
 |     the URL contained no fragments, the second element is the | 
 |     empty string. | 
 |     """ | 
 |     url, _coerce_result = _coerce_args(url) | 
 |     if '#' in url: | 
 |         s, n, p, a, q, frag = urlparse(url) | 
 |         defrag = urlunparse((s, n, p, a, q, '')) | 
 |     else: | 
 |         frag = '' | 
 |         defrag = url | 
 |     return _coerce_result(DefragResult(defrag, frag)) | 
 |  | 
 | def unquote_to_bytes(string): | 
 |     """unquote_to_bytes('abc%20def') -> b'abc def'.""" | 
 |     # Note: strings are encoded as UTF-8. This is only an issue if it contains | 
 |     # unescaped non-ASCII characters, which URIs should not. | 
 |     if not string: | 
 |         # Is it a string-like object? | 
 |         string.split | 
 |         return b'' | 
 |     if isinstance(string, str): | 
 |         string = string.encode('utf-8') | 
 |     res = string.split(b'%') | 
 |     if len(res) == 1: | 
 |         return string | 
 |     string = res[0] | 
 |     for item in res[1:]: | 
 |         try: | 
 |             string += bytes([int(item[:2], 16)]) + item[2:] | 
 |         except ValueError: | 
 |             string += b'%' + item | 
 |     return string | 
 |  | 
 | def unquote(string, encoding='utf-8', errors='replace'): | 
 |     """Replace %xx escapes by their single-character equivalent. The optional | 
 |     encoding and errors parameters specify how to decode percent-encoded | 
 |     sequences into Unicode characters, as accepted by the bytes.decode() | 
 |     method. | 
 |     By default, percent-encoded sequences are decoded with UTF-8, and invalid | 
 |     sequences are replaced by a placeholder character. | 
 |  | 
 |     unquote('abc%20def') -> 'abc def'. | 
 |     """ | 
 |     if string == '': | 
 |         return string | 
 |     res = string.split('%') | 
 |     if len(res) == 1: | 
 |         return string | 
 |     if encoding is None: | 
 |         encoding = 'utf-8' | 
 |     if errors is None: | 
 |         errors = 'replace' | 
 |     # pct_sequence: contiguous sequence of percent-encoded bytes, decoded | 
 |     pct_sequence = b'' | 
 |     string = res[0] | 
 |     for item in res[1:]: | 
 |         try: | 
 |             if not item: | 
 |                 raise ValueError | 
 |             pct_sequence += bytes.fromhex(item[:2]) | 
 |             rest = item[2:] | 
 |             if not rest: | 
 |                 # This segment was just a single percent-encoded character. | 
 |                 # May be part of a sequence of code units, so delay decoding. | 
 |                 # (Stored in pct_sequence). | 
 |                 continue | 
 |         except ValueError: | 
 |             rest = '%' + item | 
 |         # Encountered non-percent-encoded characters. Flush the current | 
 |         # pct_sequence. | 
 |         string += pct_sequence.decode(encoding, errors) + rest | 
 |         pct_sequence = b'' | 
 |     if pct_sequence: | 
 |         # Flush the final pct_sequence | 
 |         string += pct_sequence.decode(encoding, errors) | 
 |     return string | 
 |  | 
 | def parse_qs(qs, keep_blank_values=False, strict_parsing=False, | 
 |              encoding='utf-8', errors='replace'): | 
 |     """Parse a query given as a string argument. | 
 |  | 
 |         Arguments: | 
 |  | 
 |         qs: percent-encoded query string to be parsed | 
 |  | 
 |         keep_blank_values: flag indicating whether blank values in | 
 |             percent-encoded queries should be treated as blank strings. | 
 |             A true value indicates that blanks should be retained as | 
 |             blank strings.  The default false value indicates that | 
 |             blank values are to be ignored and treated as if they were | 
 |             not included. | 
 |  | 
 |         strict_parsing: flag indicating what to do with parsing errors. | 
 |             If false (the default), errors are silently ignored. | 
 |             If true, errors raise a ValueError exception. | 
 |  | 
 |         encoding and errors: specify how to decode percent-encoded sequences | 
 |             into Unicode characters, as accepted by the bytes.decode() method. | 
 |     """ | 
 |     dict = {} | 
 |     pairs = parse_qsl(qs, keep_blank_values, strict_parsing, | 
 |                       encoding=encoding, errors=errors) | 
 |     for name, value in pairs: | 
 |         if name in dict: | 
 |             dict[name].append(value) | 
 |         else: | 
 |             dict[name] = [value] | 
 |     return dict | 
 |  | 
 | def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, | 
 |               encoding='utf-8', errors='replace'): | 
 |     """Parse a query given as a string argument. | 
 |  | 
 |     Arguments: | 
 |  | 
 |     qs: percent-encoded query string to be parsed | 
 |  | 
 |     keep_blank_values: flag indicating whether blank values in | 
 |         percent-encoded queries should be treated as blank strings.  A | 
 |         true value indicates that blanks should be retained as blank | 
 |         strings.  The default false value indicates that blank values | 
 |         are to be ignored and treated as if they were  not included. | 
 |  | 
 |     strict_parsing: flag indicating what to do with parsing errors. If | 
 |         false (the default), errors are silently ignored. If true, | 
 |         errors raise a ValueError exception. | 
 |  | 
 |     encoding and errors: specify how to decode percent-encoded sequences | 
 |         into Unicode characters, as accepted by the bytes.decode() method. | 
 |  | 
 |     Returns a list, as G-d intended. | 
 |     """ | 
 |     qs, _coerce_result = _coerce_args(qs) | 
 |     pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | 
 |     r = [] | 
 |     for name_value in pairs: | 
 |         if not name_value and not strict_parsing: | 
 |             continue | 
 |         nv = name_value.split('=', 1) | 
 |         if len(nv) != 2: | 
 |             if strict_parsing: | 
 |                 raise ValueError("bad query field: %r" % (name_value,)) | 
 |             # Handle case of a control-name with no equal sign | 
 |             if keep_blank_values: | 
 |                 nv.append('') | 
 |             else: | 
 |                 continue | 
 |         if len(nv[1]) or keep_blank_values: | 
 |             name = nv[0].replace('+', ' ') | 
 |             name = unquote(name, encoding=encoding, errors=errors) | 
 |             name = _coerce_result(name) | 
 |             value = nv[1].replace('+', ' ') | 
 |             value = unquote(value, encoding=encoding, errors=errors) | 
 |             value = _coerce_result(value) | 
 |             r.append((name, value)) | 
 |     return r | 
 |  | 
 | def unquote_plus(string, encoding='utf-8', errors='replace'): | 
 |     """Like unquote(), but also replace plus signs by spaces, as required for | 
 |     unquoting HTML form values. | 
 |  | 
 |     unquote_plus('%7e/abc+def') -> '~/abc def' | 
 |     """ | 
 |     string = string.replace('+', ' ') | 
 |     return unquote(string, encoding, errors) | 
 |  | 
 | _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | 
 |                          b'abcdefghijklmnopqrstuvwxyz' | 
 |                          b'0123456789' | 
 |                          b'_.-') | 
 | _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) | 
 | _safe_quoters = {} | 
 |  | 
 | class Quoter(collections.defaultdict): | 
 |     """A mapping from bytes (in range(0,256)) to strings. | 
 |  | 
 |     String values are percent-encoded byte values, unless the key < 128, and | 
 |     in the "safe" set (either the specified safe set, or default set). | 
 |     """ | 
 |     # Keeps a cache internally, using defaultdict, for efficiency (lookups | 
 |     # of cached keys don't call Python code at all). | 
 |     def __init__(self, safe): | 
 |         """safe: bytes object.""" | 
 |         self.safe = _ALWAYS_SAFE.union(safe) | 
 |  | 
 |     def __repr__(self): | 
 |         # Without this, will just display as a defaultdict | 
 |         return "<Quoter %r>" % dict(self) | 
 |  | 
 |     def __missing__(self, b): | 
 |         # Handle a cache miss. Store quoted string in cache and return. | 
 |         res = chr(b) if b in self.safe else '%{:02X}'.format(b) | 
 |         self[b] = res | 
 |         return res | 
 |  | 
 | def quote(string, safe='/', encoding=None, errors=None): | 
 |     """quote('abc def') -> 'abc%20def' | 
 |  | 
 |     Each part of a URL, e.g. the path info, the query, etc., has a | 
 |     different set of reserved characters that must be quoted. | 
 |  | 
 |     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists | 
 |     the following reserved characters. | 
 |  | 
 |     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | | 
 |                   "$" | "," | 
 |  | 
 |     Each of these characters is reserved in some component of a URL, | 
 |     but not necessarily in all of them. | 
 |  | 
 |     By default, the quote function is intended for quoting the path | 
 |     section of a URL.  Thus, it will not encode '/'.  This character | 
 |     is reserved, but in typical usage the quote function is being | 
 |     called on a path where the existing slash characters are used as | 
 |     reserved characters. | 
 |  | 
 |     string and safe may be either str or bytes objects. encoding must | 
 |     not be specified if string is a str. | 
 |  | 
 |     The optional encoding and errors parameters specify how to deal with | 
 |     non-ASCII characters, as accepted by the str.encode method. | 
 |     By default, encoding='utf-8' (characters are encoded with UTF-8), and | 
 |     errors='strict' (unsupported characters raise a UnicodeEncodeError). | 
 |     """ | 
 |     if isinstance(string, str): | 
 |         if not string: | 
 |             return string | 
 |         if encoding is None: | 
 |             encoding = 'utf-8' | 
 |         if errors is None: | 
 |             errors = 'strict' | 
 |         string = string.encode(encoding, errors) | 
 |     else: | 
 |         if encoding is not None: | 
 |             raise TypeError("quote() doesn't support 'encoding' for bytes") | 
 |         if errors is not None: | 
 |             raise TypeError("quote() doesn't support 'errors' for bytes") | 
 |     return quote_from_bytes(string, safe) | 
 |  | 
 | def quote_plus(string, safe='', encoding=None, errors=None): | 
 |     """Like quote(), but also replace ' ' with '+', as required for quoting | 
 |     HTML form values. Plus signs in the original string are escaped unless | 
 |     they are included in safe. It also does not have safe default to '/'. | 
 |     """ | 
 |     # Check if ' ' in string, where string may either be a str or bytes.  If | 
 |     # there are no spaces, the regular quote will produce the right answer. | 
 |     if ((isinstance(string, str) and ' ' not in string) or | 
 |         (isinstance(string, bytes) and b' ' not in string)): | 
 |         return quote(string, safe, encoding, errors) | 
 |     if isinstance(safe, str): | 
 |         space = ' ' | 
 |     else: | 
 |         space = b' ' | 
 |     string = quote(string, safe + space, encoding, errors) | 
 |     return string.replace(' ', '+') | 
 |  | 
 | def quote_from_bytes(bs, safe='/'): | 
 |     """Like quote(), but accepts a bytes object rather than a str, and does | 
 |     not perform string-to-bytes encoding.  It always returns an ASCII string. | 
 |     quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB' | 
 |     """ | 
 |     if not isinstance(bs, (bytes, bytearray)): | 
 |         raise TypeError("quote_from_bytes() expected bytes") | 
 |     if not bs: | 
 |         return '' | 
 |     if isinstance(safe, str): | 
 |         # Normalize 'safe' by converting to bytes and removing non-ASCII chars | 
 |         safe = safe.encode('ascii', 'ignore') | 
 |     else: | 
 |         safe = bytes([c for c in safe if c < 128]) | 
 |     if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): | 
 |         return bs.decode() | 
 |     try: | 
 |         quoter = _safe_quoters[safe] | 
 |     except KeyError: | 
 |         _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ | 
 |     return ''.join([quoter(char) for char in bs]) | 
 |  | 
 | def urlencode(query, doseq=False, safe='', encoding=None, errors=None): | 
 |     """Encode a sequence of two-element tuples or dictionary into a URL query string. | 
 |  | 
 |     If any values in the query arg are sequences and doseq is true, each | 
 |     sequence element is converted to a separate parameter. | 
 |  | 
 |     If the query arg is a sequence of two-element tuples, the order of the | 
 |     parameters in the output will match the order of parameters in the | 
 |     input. | 
 |  | 
 |     The query arg may be either a string or a bytes type. When query arg is a | 
 |     string, the safe, encoding and error parameters are sent the quote_plus for | 
 |     encoding. | 
 |     """ | 
 |  | 
 |     if hasattr(query, "items"): | 
 |         query = query.items() | 
 |     else: | 
 |         # It's a bother at times that strings and string-like objects are | 
 |         # sequences. | 
 |         try: | 
 |             # non-sequence items should not work with len() | 
 |             # non-empty strings will fail this | 
 |             if len(query) and not isinstance(query[0], tuple): | 
 |                 raise TypeError | 
 |             # Zero-length sequences of all types will get here and succeed, | 
 |             # but that's a minor nit.  Since the original implementation | 
 |             # allowed empty dicts that type of behavior probably should be | 
 |             # preserved for consistency | 
 |         except TypeError: | 
 |             ty, va, tb = sys.exc_info() | 
 |             raise TypeError("not a valid non-string sequence " | 
 |                             "or mapping object").with_traceback(tb) | 
 |  | 
 |     l = [] | 
 |     if not doseq: | 
 |         for k, v in query: | 
 |             if isinstance(k, bytes): | 
 |                 k = quote_plus(k, safe) | 
 |             else: | 
 |                 k = quote_plus(str(k), safe, encoding, errors) | 
 |  | 
 |             if isinstance(v, bytes): | 
 |                 v = quote_plus(v, safe) | 
 |             else: | 
 |                 v = quote_plus(str(v), safe, encoding, errors) | 
 |             l.append(k + '=' + v) | 
 |     else: | 
 |         for k, v in query: | 
 |             if isinstance(k, bytes): | 
 |                 k = quote_plus(k, safe) | 
 |             else: | 
 |                 k = quote_plus(str(k), safe, encoding, errors) | 
 |  | 
 |             if isinstance(v, bytes): | 
 |                 v = quote_plus(v, safe) | 
 |                 l.append(k + '=' + v) | 
 |             elif isinstance(v, str): | 
 |                 v = quote_plus(v, safe, encoding, errors) | 
 |                 l.append(k + '=' + v) | 
 |             else: | 
 |                 try: | 
 |                     # Is this a sufficient test for sequence-ness? | 
 |                     x = len(v) | 
 |                 except TypeError: | 
 |                     # not a sequence | 
 |                     v = quote_plus(str(v), safe, encoding, errors) | 
 |                     l.append(k + '=' + v) | 
 |                 else: | 
 |                     # loop over the sequence | 
 |                     for elt in v: | 
 |                         if isinstance(elt, bytes): | 
 |                             elt = quote_plus(elt, safe) | 
 |                         else: | 
 |                             elt = quote_plus(str(elt), safe, encoding, errors) | 
 |                         l.append(k + '=' + elt) | 
 |     return '&'.join(l) | 
 |  | 
 | # Utilities to parse URLs (most of these return None for missing parts): | 
 | # unwrap('<URL:type://host/path>') --> 'type://host/path' | 
 | # splittype('type:opaquestring') --> 'type', 'opaquestring' | 
 | # splithost('//host[:port]/path') --> 'host[:port]', '/path' | 
 | # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' | 
 | # splitpasswd('user:passwd') -> 'user', 'passwd' | 
 | # splitport('host:port') --> 'host', 'port' | 
 | # splitquery('/path?query') --> '/path', 'query' | 
 | # splittag('/path#tag') --> '/path', 'tag' | 
 | # splitattr('/path;attr1=value1;attr2=value2;...') -> | 
 | #   '/path', ['attr1=value1', 'attr2=value2', ...] | 
 | # splitvalue('attr=value') --> 'attr', 'value' | 
 | # urllib.parse.unquote('abc%20def') -> 'abc def' | 
 | # quote('abc def') -> 'abc%20def') | 
 |  | 
 | def to_bytes(url): | 
 |     """to_bytes(u"URL") --> 'URL'.""" | 
 |     # Most URL schemes require ASCII. If that changes, the conversion | 
 |     # can be relaxed. | 
 |     # XXX get rid of to_bytes() | 
 |     if isinstance(url, str): | 
 |         try: | 
 |             url = url.encode("ASCII").decode() | 
 |         except UnicodeError: | 
 |             raise UnicodeError("URL " + repr(url) + | 
 |                                " contains non-ASCII characters") | 
 |     return url | 
 |  | 
 | def unwrap(url): | 
 |     """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" | 
 |     url = str(url).strip() | 
 |     if url[:1] == '<' and url[-1:] == '>': | 
 |         url = url[1:-1].strip() | 
 |     if url[:4] == 'URL:': url = url[4:].strip() | 
 |     return url | 
 |  | 
 | _typeprog = None | 
 | def splittype(url): | 
 |     """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" | 
 |     global _typeprog | 
 |     if _typeprog is None: | 
 |         import re | 
 |         _typeprog = re.compile('^([^/:]+):') | 
 |  | 
 |     match = _typeprog.match(url) | 
 |     if match: | 
 |         scheme = match.group(1) | 
 |         return scheme.lower(), url[len(scheme) + 1:] | 
 |     return None, url | 
 |  | 
 | _hostprog = None | 
 | def splithost(url): | 
 |     """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" | 
 |     global _hostprog | 
 |     if _hostprog is None: | 
 |         import re | 
 |         _hostprog = re.compile('^//([^/?]*)(.*)$') | 
 |  | 
 |     match = _hostprog.match(url) | 
 |     if match: | 
 |         host_port = match.group(1) | 
 |         path = match.group(2) | 
 |         if path and not path.startswith('/'): | 
 |             path = '/' + path | 
 |         return host_port, path | 
 |     return None, url | 
 |  | 
 | _userprog = None | 
 | def splituser(host): | 
 |     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" | 
 |     global _userprog | 
 |     if _userprog is None: | 
 |         import re | 
 |         _userprog = re.compile('^(.*)@(.*)$') | 
 |  | 
 |     match = _userprog.match(host) | 
 |     if match: return match.group(1, 2) | 
 |     return None, host | 
 |  | 
 | _passwdprog = None | 
 | def splitpasswd(user): | 
 |     """splitpasswd('user:passwd') -> 'user', 'passwd'.""" | 
 |     global _passwdprog | 
 |     if _passwdprog is None: | 
 |         import re | 
 |         _passwdprog = re.compile('^([^:]*):(.*)$',re.S) | 
 |  | 
 |     match = _passwdprog.match(user) | 
 |     if match: return match.group(1, 2) | 
 |     return user, None | 
 |  | 
 | # splittag('/path#tag') --> '/path', 'tag' | 
 | _portprog = None | 
 | def splitport(host): | 
 |     """splitport('host:port') --> 'host', 'port'.""" | 
 |     global _portprog | 
 |     if _portprog is None: | 
 |         import re | 
 |         _portprog = re.compile('^(.*):([0-9]+)$') | 
 |  | 
 |     match = _portprog.match(host) | 
 |     if match: return match.group(1, 2) | 
 |     return host, None | 
 |  | 
 | _nportprog = None | 
 | def splitnport(host, defport=-1): | 
 |     """Split host and port, returning numeric port. | 
 |     Return given default port if no ':' found; defaults to -1. | 
 |     Return numerical port if a valid number are found after ':'. | 
 |     Return None if ':' but not a valid number.""" | 
 |     global _nportprog | 
 |     if _nportprog is None: | 
 |         import re | 
 |         _nportprog = re.compile('^(.*):(.*)$') | 
 |  | 
 |     match = _nportprog.match(host) | 
 |     if match: | 
 |         host, port = match.group(1, 2) | 
 |         try: | 
 |             if not port: raise ValueError("no digits") | 
 |             nport = int(port) | 
 |         except ValueError: | 
 |             nport = None | 
 |         return host, nport | 
 |     return host, defport | 
 |  | 
 | _queryprog = None | 
 | def splitquery(url): | 
 |     """splitquery('/path?query') --> '/path', 'query'.""" | 
 |     global _queryprog | 
 |     if _queryprog is None: | 
 |         import re | 
 |         _queryprog = re.compile('^(.*)\?([^?]*)$') | 
 |  | 
 |     match = _queryprog.match(url) | 
 |     if match: return match.group(1, 2) | 
 |     return url, None | 
 |  | 
 | _tagprog = None | 
 | def splittag(url): | 
 |     """splittag('/path#tag') --> '/path', 'tag'.""" | 
 |     global _tagprog | 
 |     if _tagprog is None: | 
 |         import re | 
 |         _tagprog = re.compile('^(.*)#([^#]*)$') | 
 |  | 
 |     match = _tagprog.match(url) | 
 |     if match: return match.group(1, 2) | 
 |     return url, None | 
 |  | 
 | def splitattr(url): | 
 |     """splitattr('/path;attr1=value1;attr2=value2;...') -> | 
 |         '/path', ['attr1=value1', 'attr2=value2', ...].""" | 
 |     words = url.split(';') | 
 |     return words[0], words[1:] | 
 |  | 
 | _valueprog = None | 
 | def splitvalue(attr): | 
 |     """splitvalue('attr=value') --> 'attr', 'value'.""" | 
 |     global _valueprog | 
 |     if _valueprog is None: | 
 |         import re | 
 |         _valueprog = re.compile('^([^=]*)=(.*)$') | 
 |  | 
 |     match = _valueprog.match(attr) | 
 |     if match: return match.group(1, 2) | 
 |     return attr, None |