| """Parse (absolute and relative) URLs. | 
 |  | 
 | See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, | 
 | UC Irvine, June 1995. | 
 | """ | 
 |  | 
 | import sys | 
 | import collections | 
 |  | 
 | __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", | 
 |            "urlsplit", "urlunsplit", "parse_qs", "parse_qsl", | 
 |            "quote", "quote_plus", "quote_from_bytes", | 
 |            "unquote", "unquote_plus", "unquote_to_bytes"] | 
 |  | 
 | # A classification of schemes ('' means apply by default) | 
 | uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', | 
 |                  'wais', 'file', 'https', 'shttp', 'mms', | 
 |                  'prospero', 'rtsp', 'rtspu', '', 'sftp'] | 
 | uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', | 
 |                'imap', 'wais', 'file', 'mms', 'https', 'shttp', | 
 |                'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', | 
 |                'svn', 'svn+ssh', 'sftp','nfs'] | 
 | non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', | 
 |                     'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] | 
 | uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', | 
 |                'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', | 
 |                'mms', '', 'sftp'] | 
 | uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', | 
 |               'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] | 
 | uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', | 
 |                  'nntp', 'wais', 'https', 'shttp', 'snews', | 
 |                  'file', 'prospero', ''] | 
 |  | 
 | # Characters valid in scheme names | 
 | scheme_chars = ('abcdefghijklmnopqrstuvwxyz' | 
 |                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | 
 |                 '0123456789' | 
 |                 '+-.') | 
 |  | 
 | MAX_CACHE_SIZE = 20 | 
 | _parse_cache = {} | 
 |  | 
 | def clear_cache(): | 
 |     """Clear the parse cache.""" | 
 |     _parse_cache.clear() | 
 |  | 
 |  | 
 | class ResultMixin(object): | 
 |     """Shared methods for the parsed result objects.""" | 
 |  | 
 |     @property | 
 |     def username(self): | 
 |         netloc = self.netloc | 
 |         if "@" in netloc: | 
 |             userinfo = netloc.rsplit("@", 1)[0] | 
 |             if ":" in userinfo: | 
 |                 userinfo = userinfo.split(":", 1)[0] | 
 |             return userinfo | 
 |         return None | 
 |  | 
 |     @property | 
 |     def password(self): | 
 |         netloc = self.netloc | 
 |         if "@" in netloc: | 
 |             userinfo = netloc.rsplit("@", 1)[0] | 
 |             if ":" in userinfo: | 
 |                 return userinfo.split(":", 1)[1] | 
 |         return None | 
 |  | 
 |     @property | 
 |     def hostname(self): | 
 |         netloc = self.netloc | 
 |         if "@" in netloc: | 
 |             netloc = netloc.rsplit("@", 1)[1] | 
 |         if ":" in netloc: | 
 |             netloc = netloc.split(":", 1)[0] | 
 |         return netloc.lower() or None | 
 |  | 
 |     @property | 
 |     def port(self): | 
 |         netloc = self.netloc | 
 |         if "@" in netloc: | 
 |             netloc = netloc.rsplit("@", 1)[1] | 
 |         if ":" in netloc: | 
 |             port = netloc.split(":", 1)[1] | 
 |             return int(port, 10) | 
 |         return None | 
 |  | 
 | from collections import namedtuple | 
 |  | 
 | class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin): | 
 |  | 
 |     __slots__ = () | 
 |  | 
 |     def geturl(self): | 
 |         return urlunsplit(self) | 
 |  | 
 |  | 
 | class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin): | 
 |  | 
 |     __slots__ = () | 
 |  | 
 |     def geturl(self): | 
 |         return urlunparse(self) | 
 |  | 
 |  | 
 | def urlparse(url, scheme='', allow_fragments=True): | 
 |     """Parse a URL into 6 components: | 
 |     <scheme>://<netloc>/<path>;<params>?<query>#<fragment> | 
 |     Return a 6-tuple: (scheme, netloc, path, params, query, fragment). | 
 |     Note that we don't break the components up in smaller bits | 
 |     (e.g. netloc is a single string) and we don't expand % escapes.""" | 
 |     tuple = urlsplit(url, scheme, allow_fragments) | 
 |     scheme, netloc, url, query, fragment = tuple | 
 |     if scheme in uses_params and ';' in url: | 
 |         url, params = _splitparams(url) | 
 |     else: | 
 |         params = '' | 
 |     return ParseResult(scheme, netloc, url, params, query, fragment) | 
 |  | 
 | def _splitparams(url): | 
 |     if '/'  in url: | 
 |         i = url.find(';', url.rfind('/')) | 
 |         if i < 0: | 
 |             return url, '' | 
 |     else: | 
 |         i = url.find(';') | 
 |     return url[:i], url[i+1:] | 
 |  | 
 | def _splitnetloc(url, start=0): | 
 |     delim = len(url)   # position of end of domain part of url, default is end | 
 |     for c in '/?#':    # look for delimiters; the order is NOT important | 
 |         wdelim = url.find(c, start)        # find first of this delim | 
 |         if wdelim >= 0:                    # if found | 
 |             delim = min(delim, wdelim)     # use earliest delim position | 
 |     return url[start:delim], url[delim:]   # return (domain, rest) | 
 |  | 
 | def urlsplit(url, scheme='', allow_fragments=True): | 
 |     """Parse a URL into 5 components: | 
 |     <scheme>://<netloc>/<path>?<query>#<fragment> | 
 |     Return a 5-tuple: (scheme, netloc, path, query, fragment). | 
 |     Note that we don't break the components up in smaller bits | 
 |     (e.g. netloc is a single string) and we don't expand % escapes.""" | 
 |     allow_fragments = bool(allow_fragments) | 
 |     key = url, scheme, allow_fragments, type(url), type(scheme) | 
 |     cached = _parse_cache.get(key, None) | 
 |     if cached: | 
 |         return cached | 
 |     if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth | 
 |         clear_cache() | 
 |     netloc = query = fragment = '' | 
 |     i = url.find(':') | 
 |     if i > 0: | 
 |         if url[:i] == 'http': # optimize the common case | 
 |             scheme = url[:i].lower() | 
 |             url = url[i+1:] | 
 |             if url[:2] == '//': | 
 |                 netloc, url = _splitnetloc(url, 2) | 
 |             if allow_fragments and '#' in url: | 
 |                 url, fragment = url.split('#', 1) | 
 |             if '?' in url: | 
 |                 url, query = url.split('?', 1) | 
 |             v = SplitResult(scheme, netloc, url, query, fragment) | 
 |             _parse_cache[key] = v | 
 |             return v | 
 |         for c in url[:i]: | 
 |             if c not in scheme_chars: | 
 |                 break | 
 |         else: | 
 |             scheme, url = url[:i].lower(), url[i+1:] | 
 |     if scheme in uses_netloc and url[:2] == '//': | 
 |         netloc, url = _splitnetloc(url, 2) | 
 |     if allow_fragments and scheme in uses_fragment and '#' in url: | 
 |         url, fragment = url.split('#', 1) | 
 |     if scheme in uses_query and '?' in url: | 
 |         url, query = url.split('?', 1) | 
 |     v = SplitResult(scheme, netloc, url, query, fragment) | 
 |     _parse_cache[key] = v | 
 |     return v | 
 |  | 
 | def urlunparse(components): | 
 |     """Put a parsed URL back together again.  This may result in a | 
 |     slightly different, but equivalent URL, if the URL that was parsed | 
 |     originally had redundant delimiters, e.g. a ? with an empty query | 
 |     (the draft states that these are equivalent).""" | 
 |     scheme, netloc, url, params, query, fragment = components | 
 |     if params: | 
 |         url = "%s;%s" % (url, params) | 
 |     return urlunsplit((scheme, netloc, url, query, fragment)) | 
 |  | 
 | def urlunsplit(components): | 
 |     scheme, netloc, url, query, fragment = components | 
 |     if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): | 
 |         if url and url[:1] != '/': url = '/' + url | 
 |         url = '//' + (netloc or '') + url | 
 |     if scheme: | 
 |         url = scheme + ':' + url | 
 |     if query: | 
 |         url = url + '?' + query | 
 |     if fragment: | 
 |         url = url + '#' + fragment | 
 |     return url | 
 |  | 
 | def urljoin(base, url, allow_fragments=True): | 
 |     """Join a base URL and a possibly relative URL to form an absolute | 
 |     interpretation of the latter.""" | 
 |     if not base: | 
 |         return url | 
 |     if not url: | 
 |         return base | 
 |     bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ | 
 |             urlparse(base, '', allow_fragments) | 
 |     scheme, netloc, path, params, query, fragment = \ | 
 |             urlparse(url, bscheme, allow_fragments) | 
 |     if scheme != bscheme or scheme not in uses_relative: | 
 |         return url | 
 |     if scheme in uses_netloc: | 
 |         if netloc: | 
 |             return urlunparse((scheme, netloc, path, | 
 |                                params, query, fragment)) | 
 |         netloc = bnetloc | 
 |     if path[:1] == '/': | 
 |         return urlunparse((scheme, netloc, path, | 
 |                            params, query, fragment)) | 
 |     if not path: | 
 |         path = bpath | 
 |         if not params: | 
 |             params = bparams | 
 |         else: | 
 |             path = path[:-1] | 
 |             return urlunparse((scheme, netloc, path, | 
 |                                 params, query, fragment)) | 
 |         if not query: | 
 |             query = bquery | 
 |         return urlunparse((scheme, netloc, path, | 
 |                            params, query, fragment)) | 
 |     segments = bpath.split('/')[:-1] + path.split('/') | 
 |     # XXX The stuff below is bogus in various ways... | 
 |     if segments[-1] == '.': | 
 |         segments[-1] = '' | 
 |     while '.' in segments: | 
 |         segments.remove('.') | 
 |     while 1: | 
 |         i = 1 | 
 |         n = len(segments) - 1 | 
 |         while i < n: | 
 |             if (segments[i] == '..' | 
 |                 and segments[i-1] not in ('', '..')): | 
 |                 del segments[i-1:i+1] | 
 |                 break | 
 |             i = i+1 | 
 |         else: | 
 |             break | 
 |     if segments == ['', '..']: | 
 |         segments[-1] = '' | 
 |     elif len(segments) >= 2 and segments[-1] == '..': | 
 |         segments[-2:] = [''] | 
 |     return urlunparse((scheme, netloc, '/'.join(segments), | 
 |                        params, query, fragment)) | 
 |  | 
 | def urldefrag(url): | 
 |     """Removes any existing fragment from URL. | 
 |  | 
 |     Returns a tuple of the defragmented URL and the fragment.  If | 
 |     the URL contained no fragments, the second element is the | 
 |     empty string. | 
 |     """ | 
 |     if '#' in url: | 
 |         s, n, p, a, q, frag = urlparse(url) | 
 |         defrag = urlunparse((s, n, p, a, q, '')) | 
 |         return defrag, frag | 
 |     else: | 
 |         return url, '' | 
 |  | 
 | def unquote_to_bytes(string): | 
 |     """unquote_to_bytes('abc%20def') -> b'abc def'.""" | 
 |     # Note: strings are encoded as UTF-8. This is only an issue if it contains | 
 |     # unescaped non-ASCII characters, which URIs should not. | 
 |     if isinstance(string, str): | 
 |         string = string.encode('utf-8') | 
 |     res = string.split(b'%') | 
 |     res[0] = res[0] | 
 |     for i in range(1, len(res)): | 
 |         item = res[i] | 
 |         try: | 
 |             res[i] = bytes([int(item[:2], 16)]) + item[2:] | 
 |         except ValueError: | 
 |             res[i] = b'%' + item | 
 |     return b''.join(res) | 
 |  | 
 | def unquote(string, encoding='utf-8', errors='replace'): | 
 |     """Replace %xx escapes by their single-character equivalent. The optional | 
 |     encoding and errors parameters specify how to decode percent-encoded | 
 |     sequences into Unicode characters, as accepted by the bytes.decode() | 
 |     method. | 
 |     By default, percent-encoded sequences are decoded with UTF-8, and invalid | 
 |     sequences are replaced by a placeholder character. | 
 |  | 
 |     unquote('abc%20def') -> 'abc def'. | 
 |     """ | 
 |     if encoding is None: encoding = 'utf-8' | 
 |     if errors is None: errors = 'replace' | 
 |     # pct_sequence: contiguous sequence of percent-encoded bytes, decoded | 
 |     # (list of single-byte bytes objects) | 
 |     pct_sequence = [] | 
 |     res = string.split('%') | 
 |     for i in range(1, len(res)): | 
 |         item = res[i] | 
 |         try: | 
 |             if not item: raise ValueError | 
 |             pct_sequence.append(bytes.fromhex(item[:2])) | 
 |             rest = item[2:] | 
 |         except ValueError: | 
 |             rest = '%' + item | 
 |         if not rest: | 
 |             # This segment was just a single percent-encoded character. | 
 |             # May be part of a sequence of code units, so delay decoding. | 
 |             # (Stored in pct_sequence). | 
 |             res[i] = '' | 
 |         else: | 
 |             # Encountered non-percent-encoded characters. Flush the current | 
 |             # pct_sequence. | 
 |             res[i] = b''.join(pct_sequence).decode(encoding, errors) + rest | 
 |             pct_sequence = [] | 
 |     if pct_sequence: | 
 |         # Flush the final pct_sequence | 
 |         # res[-1] will always be empty if pct_sequence != [] | 
 |         assert not res[-1], "string=%r, res=%r" % (string, res) | 
 |         res[-1] = b''.join(pct_sequence).decode(encoding, errors) | 
 |     return ''.join(res) | 
 |  | 
 | def parse_qs(qs, keep_blank_values=0, strict_parsing=0): | 
 |     """Parse a query given as a string argument. | 
 |  | 
 |         Arguments: | 
 |  | 
 |         qs: URL-encoded query string to be parsed | 
 |  | 
 |         keep_blank_values: flag indicating whether blank values in | 
 |             URL encoded queries should be treated as blank strings. | 
 |             A true value indicates that blanks should be retained as | 
 |             blank strings.  The default false value indicates that | 
 |             blank values are to be ignored and treated as if they were | 
 |             not included. | 
 |  | 
 |         strict_parsing: flag indicating what to do with parsing errors. | 
 |             If false (the default), errors are silently ignored. | 
 |             If true, errors raise a ValueError exception. | 
 |     """ | 
 |     dict = {} | 
 |     for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): | 
 |         if name in dict: | 
 |             dict[name].append(value) | 
 |         else: | 
 |             dict[name] = [value] | 
 |     return dict | 
 |  | 
 | def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): | 
 |     """Parse a query given as a string argument. | 
 |  | 
 |     Arguments: | 
 |  | 
 |     qs: URL-encoded query string to be parsed | 
 |  | 
 |     keep_blank_values: flag indicating whether blank values in | 
 |         URL encoded queries should be treated as blank strings.  A | 
 |         true value indicates that blanks should be retained as blank | 
 |         strings.  The default false value indicates that blank values | 
 |         are to be ignored and treated as if they were  not included. | 
 |  | 
 |     strict_parsing: flag indicating what to do with parsing errors. If | 
 |         false (the default), errors are silently ignored. If true, | 
 |         errors raise a ValueError exception. | 
 |  | 
 |     Returns a list, as G-d intended. | 
 |     """ | 
 |     pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | 
 |     r = [] | 
 |     for name_value in pairs: | 
 |         if not name_value and not strict_parsing: | 
 |             continue | 
 |         nv = name_value.split('=', 1) | 
 |         if len(nv) != 2: | 
 |             if strict_parsing: | 
 |                 raise ValueError("bad query field: %r" % (name_value,)) | 
 |             # Handle case of a control-name with no equal sign | 
 |             if keep_blank_values: | 
 |                 nv.append('') | 
 |             else: | 
 |                 continue | 
 |         if len(nv[1]) or keep_blank_values: | 
 |             name = unquote(nv[0].replace('+', ' ')) | 
 |             value = unquote(nv[1].replace('+', ' ')) | 
 |             r.append((name, value)) | 
 |  | 
 |     return r | 
 |  | 
 | def unquote_plus(string, encoding='utf-8', errors='replace'): | 
 |     """Like unquote(), but also replace plus signs by spaces, as required for | 
 |     unquoting HTML form values. | 
 |  | 
 |     unquote_plus('%7e/abc+def') -> '~/abc def' | 
 |     """ | 
 |     string = string.replace('+', ' ') | 
 |     return unquote(string, encoding, errors) | 
 |  | 
 | _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | 
 |                          b'abcdefghijklmnopqrstuvwxyz' | 
 |                          b'0123456789' | 
 |                          b'_.-') | 
 | _safe_quoters= {} | 
 |  | 
 | class Quoter(collections.defaultdict): | 
 |     """A mapping from bytes (in range(0,256)) to strings. | 
 |  | 
 |     String values are percent-encoded byte values, unless the key < 128, and | 
 |     in the "safe" set (either the specified safe set, or default set). | 
 |     """ | 
 |     # Keeps a cache internally, using defaultdict, for efficiency (lookups | 
 |     # of cached keys don't call Python code at all). | 
 |     def __init__(self, safe): | 
 |         """safe: bytes object.""" | 
 |         self.safe = _ALWAYS_SAFE.union(c for c in safe if c < 128) | 
 |  | 
 |     def __repr__(self): | 
 |         # Without this, will just display as a defaultdict | 
 |         return "<Quoter %r>" % dict(self) | 
 |  | 
 |     def __missing__(self, b): | 
 |         # Handle a cache miss. Store quoted string in cache and return. | 
 |         res = b in self.safe and chr(b) or ('%%%02X' % b) | 
 |         self[b] = res | 
 |         return res | 
 |  | 
 | def quote(string, safe='/', encoding=None, errors=None): | 
 |     """quote('abc def') -> 'abc%20def' | 
 |  | 
 |     Each part of a URL, e.g. the path info, the query, etc., has a | 
 |     different set of reserved characters that must be quoted. | 
 |  | 
 |     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists | 
 |     the following reserved characters. | 
 |  | 
 |     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | | 
 |                   "$" | "," | 
 |  | 
 |     Each of these characters is reserved in some component of a URL, | 
 |     but not necessarily in all of them. | 
 |  | 
 |     By default, the quote function is intended for quoting the path | 
 |     section of a URL.  Thus, it will not encode '/'.  This character | 
 |     is reserved, but in typical usage the quote function is being | 
 |     called on a path where the existing slash characters are used as | 
 |     reserved characters. | 
 |  | 
 |     string and safe may be either str or bytes objects. encoding must | 
 |     not be specified if string is a str. | 
 |  | 
 |     The optional encoding and errors parameters specify how to deal with | 
 |     non-ASCII characters, as accepted by the str.encode method. | 
 |     By default, encoding='utf-8' (characters are encoded with UTF-8), and | 
 |     errors='strict' (unsupported characters raise a UnicodeEncodeError). | 
 |     """ | 
 |     if isinstance(string, str): | 
 |         if encoding is None: | 
 |             encoding = 'utf-8' | 
 |         if errors is None: | 
 |             errors = 'strict' | 
 |         string = string.encode(encoding, errors) | 
 |     else: | 
 |         if encoding is not None: | 
 |             raise TypeError("quote() doesn't support 'encoding' for bytes") | 
 |         if errors is not None: | 
 |             raise TypeError("quote() doesn't support 'errors' for bytes") | 
 |     return quote_from_bytes(string, safe) | 
 |  | 
 | def quote_plus(string, safe='', encoding=None, errors=None): | 
 |     """Like quote(), but also replace ' ' with '+', as required for quoting | 
 |     HTML form values. Plus signs in the original string are escaped unless | 
 |     they are included in safe. It also does not have safe default to '/'. | 
 |     """ | 
 |     # Check if ' ' in string, where string may either be a str or bytes.  If | 
 |     # there are no spaces, the regular quote will produce the right answer. | 
 |     if ((isinstance(string, str) and ' ' not in string) or | 
 |         (isinstance(string, bytes) and b' ' not in string)): | 
 |         return quote(string, safe, encoding, errors) | 
 |     if isinstance(safe, str): | 
 |         space = ' ' | 
 |     else: | 
 |         space = b' ' | 
 |     string = quote(string, safe + space) | 
 |     return string.replace(' ', '+') | 
 |  | 
 | def quote_from_bytes(bs, safe='/'): | 
 |     """Like quote(), but accepts a bytes object rather than a str, and does | 
 |     not perform string-to-bytes encoding.  It always returns an ASCII string. | 
 |     quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB' | 
 |     """ | 
 |     if isinstance(safe, str): | 
 |         # Normalize 'safe' by converting to bytes and removing non-ASCII chars | 
 |         safe = safe.encode('ascii', 'ignore') | 
 |     cachekey = bytes(safe)  # In case it was a bytearray | 
 |     if not (isinstance(bs, bytes) or isinstance(bs, bytearray)): | 
 |         raise TypeError("quote_from_bytes() expected a bytes") | 
 |     try: | 
 |         quoter = _safe_quoters[cachekey] | 
 |     except KeyError: | 
 |         quoter = Quoter(safe) | 
 |         _safe_quoters[cachekey] = quoter | 
 |     return ''.join([quoter[char] for char in bs]) | 
 |  | 
 | def urlencode(query, doseq=0): | 
 |     """Encode a sequence of two-element tuples or dictionary into a URL query string. | 
 |  | 
 |     If any values in the query arg are sequences and doseq is true, each | 
 |     sequence element is converted to a separate parameter. | 
 |  | 
 |     If the query arg is a sequence of two-element tuples, the order of the | 
 |     parameters in the output will match the order of parameters in the | 
 |     input. | 
 |     """ | 
 |  | 
 |     if hasattr(query, "items"): | 
 |         query = query.items() | 
 |     else: | 
 |         # It's a bother at times that strings and string-like objects are | 
 |         # sequences. | 
 |         try: | 
 |             # non-sequence items should not work with len() | 
 |             # non-empty strings will fail this | 
 |             if len(query) and not isinstance(query[0], tuple): | 
 |                 raise TypeError | 
 |             # Zero-length sequences of all types will get here and succeed, | 
 |             # but that's a minor nit.  Since the original implementation | 
 |             # allowed empty dicts that type of behavior probably should be | 
 |             # preserved for consistency | 
 |         except TypeError: | 
 |             ty, va, tb = sys.exc_info() | 
 |             raise TypeError("not a valid non-string sequence " | 
 |                             "or mapping object").with_traceback(tb) | 
 |  | 
 |     l = [] | 
 |     if not doseq: | 
 |         for k, v in query: | 
 |             k = quote_plus(str(k)) | 
 |             v = quote_plus(str(v)) | 
 |             l.append(k + '=' + v) | 
 |     else: | 
 |         for k, v in query: | 
 |             k = quote_plus(str(k)) | 
 |             if isinstance(v, str): | 
 |                 v = quote_plus(v) | 
 |                 l.append(k + '=' + v) | 
 |             else: | 
 |                 try: | 
 |                     # Is this a sufficient test for sequence-ness? | 
 |                     x = len(v) | 
 |                 except TypeError: | 
 |                     # not a sequence | 
 |                     v = quote_plus(str(v)) | 
 |                     l.append(k + '=' + v) | 
 |                 else: | 
 |                     # loop over the sequence | 
 |                     for elt in v: | 
 |                         l.append(k + '=' + quote_plus(str(elt))) | 
 |     return '&'.join(l) | 
 |  | 
 | # Utilities to parse URLs (most of these return None for missing parts): | 
 | # unwrap('<URL:type://host/path>') --> 'type://host/path' | 
 | # splittype('type:opaquestring') --> 'type', 'opaquestring' | 
 | # splithost('//host[:port]/path') --> 'host[:port]', '/path' | 
 | # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' | 
 | # splitpasswd('user:passwd') -> 'user', 'passwd' | 
 | # splitport('host:port') --> 'host', 'port' | 
 | # splitquery('/path?query') --> '/path', 'query' | 
 | # splittag('/path#tag') --> '/path', 'tag' | 
 | # splitattr('/path;attr1=value1;attr2=value2;...') -> | 
 | #   '/path', ['attr1=value1', 'attr2=value2', ...] | 
 | # splitvalue('attr=value') --> 'attr', 'value' | 
 | # urllib.parse.unquote('abc%20def') -> 'abc def' | 
 | # quote('abc def') -> 'abc%20def') | 
 |  | 
 | def to_bytes(url): | 
 |     """to_bytes(u"URL") --> 'URL'.""" | 
 |     # Most URL schemes require ASCII. If that changes, the conversion | 
 |     # can be relaxed. | 
 |     # XXX get rid of to_bytes() | 
 |     if isinstance(url, str): | 
 |         try: | 
 |             url = url.encode("ASCII").decode() | 
 |         except UnicodeError: | 
 |             raise UnicodeError("URL " + repr(url) + | 
 |                                " contains non-ASCII characters") | 
 |     return url | 
 |  | 
 | def unwrap(url): | 
 |     """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" | 
 |     url = str(url).strip() | 
 |     if url[:1] == '<' and url[-1:] == '>': | 
 |         url = url[1:-1].strip() | 
 |     if url[:4] == 'URL:': url = url[4:].strip() | 
 |     return url | 
 |  | 
 | _typeprog = None | 
 | def splittype(url): | 
 |     """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" | 
 |     global _typeprog | 
 |     if _typeprog is None: | 
 |         import re | 
 |         _typeprog = re.compile('^([^/:]+):') | 
 |  | 
 |     match = _typeprog.match(url) | 
 |     if match: | 
 |         scheme = match.group(1) | 
 |         return scheme.lower(), url[len(scheme) + 1:] | 
 |     return None, url | 
 |  | 
 | _hostprog = None | 
 | def splithost(url): | 
 |     """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" | 
 |     global _hostprog | 
 |     if _hostprog is None: | 
 |         import re | 
 |         _hostprog = re.compile('^//([^/?]*)(.*)$') | 
 |  | 
 |     match = _hostprog.match(url) | 
 |     if match: return match.group(1, 2) | 
 |     return None, url | 
 |  | 
 | _userprog = None | 
 | def splituser(host): | 
 |     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" | 
 |     global _userprog | 
 |     if _userprog is None: | 
 |         import re | 
 |         _userprog = re.compile('^(.*)@(.*)$') | 
 |  | 
 |     match = _userprog.match(host) | 
 |     if match: return map(unquote, match.group(1, 2)) | 
 |     return None, host | 
 |  | 
 | _passwdprog = None | 
 | def splitpasswd(user): | 
 |     """splitpasswd('user:passwd') -> 'user', 'passwd'.""" | 
 |     global _passwdprog | 
 |     if _passwdprog is None: | 
 |         import re | 
 |         _passwdprog = re.compile('^([^:]*):(.*)$',re.S) | 
 |  | 
 |     match = _passwdprog.match(user) | 
 |     if match: return match.group(1, 2) | 
 |     return user, None | 
 |  | 
 | # splittag('/path#tag') --> '/path', 'tag' | 
 | _portprog = None | 
 | def splitport(host): | 
 |     """splitport('host:port') --> 'host', 'port'.""" | 
 |     global _portprog | 
 |     if _portprog is None: | 
 |         import re | 
 |         _portprog = re.compile('^(.*):([0-9]+)$') | 
 |  | 
 |     match = _portprog.match(host) | 
 |     if match: return match.group(1, 2) | 
 |     return host, None | 
 |  | 
 | _nportprog = None | 
 | def splitnport(host, defport=-1): | 
 |     """Split host and port, returning numeric port. | 
 |     Return given default port if no ':' found; defaults to -1. | 
 |     Return numerical port if a valid number are found after ':'. | 
 |     Return None if ':' but not a valid number.""" | 
 |     global _nportprog | 
 |     if _nportprog is None: | 
 |         import re | 
 |         _nportprog = re.compile('^(.*):(.*)$') | 
 |  | 
 |     match = _nportprog.match(host) | 
 |     if match: | 
 |         host, port = match.group(1, 2) | 
 |         try: | 
 |             if not port: raise ValueError("no digits") | 
 |             nport = int(port) | 
 |         except ValueError: | 
 |             nport = None | 
 |         return host, nport | 
 |     return host, defport | 
 |  | 
 | _queryprog = None | 
 | def splitquery(url): | 
 |     """splitquery('/path?query') --> '/path', 'query'.""" | 
 |     global _queryprog | 
 |     if _queryprog is None: | 
 |         import re | 
 |         _queryprog = re.compile('^(.*)\?([^?]*)$') | 
 |  | 
 |     match = _queryprog.match(url) | 
 |     if match: return match.group(1, 2) | 
 |     return url, None | 
 |  | 
 | _tagprog = None | 
 | def splittag(url): | 
 |     """splittag('/path#tag') --> '/path', 'tag'.""" | 
 |     global _tagprog | 
 |     if _tagprog is None: | 
 |         import re | 
 |         _tagprog = re.compile('^(.*)#([^#]*)$') | 
 |  | 
 |     match = _tagprog.match(url) | 
 |     if match: return match.group(1, 2) | 
 |     return url, None | 
 |  | 
 | def splitattr(url): | 
 |     """splitattr('/path;attr1=value1;attr2=value2;...') -> | 
 |         '/path', ['attr1=value1', 'attr2=value2', ...].""" | 
 |     words = url.split(';') | 
 |     return words[0], words[1:] | 
 |  | 
 | _valueprog = None | 
 | def splitvalue(attr): | 
 |     """splitvalue('attr=value') --> 'attr', 'value'.""" | 
 |     global _valueprog | 
 |     if _valueprog is None: | 
 |         import re | 
 |         _valueprog = re.compile('^([^=]*)=(.*)$') | 
 |  | 
 |     match = _valueprog.match(attr) | 
 |     if match: return match.group(1, 2) | 
 |     return attr, None | 
 |  | 
 | test_input = """ | 
 |       http://a/b/c/d | 
 |  | 
 |       g:h        = <URL:g:h> | 
 |       http:g     = <URL:http://a/b/c/g> | 
 |       http:      = <URL:http://a/b/c/d> | 
 |       g          = <URL:http://a/b/c/g> | 
 |       ./g        = <URL:http://a/b/c/g> | 
 |       g/         = <URL:http://a/b/c/g/> | 
 |       /g         = <URL:http://a/g> | 
 |       //g        = <URL:http://g> | 
 |       ?y         = <URL:http://a/b/c/d?y> | 
 |       g?y        = <URL:http://a/b/c/g?y> | 
 |       g?y/./x    = <URL:http://a/b/c/g?y/./x> | 
 |       .          = <URL:http://a/b/c/> | 
 |       ./         = <URL:http://a/b/c/> | 
 |       ..         = <URL:http://a/b/> | 
 |       ../        = <URL:http://a/b/> | 
 |       ../g       = <URL:http://a/b/g> | 
 |       ../..      = <URL:http://a/> | 
 |       ../../g    = <URL:http://a/g> | 
 |       ../../../g = <URL:http://a/../g> | 
 |       ./../g     = <URL:http://a/b/g> | 
 |       ./g/.      = <URL:http://a/b/c/g/> | 
 |       /./g       = <URL:http://a/./g> | 
 |       g/./h      = <URL:http://a/b/c/g/h> | 
 |       g/../h     = <URL:http://a/b/c/h> | 
 |       http:g     = <URL:http://a/b/c/g> | 
 |       http:      = <URL:http://a/b/c/d> | 
 |       http:?y         = <URL:http://a/b/c/d?y> | 
 |       http:g?y        = <URL:http://a/b/c/g?y> | 
 |       http:g?y/./x    = <URL:http://a/b/c/g?y/./x> | 
 | """ | 
 |  | 
 | def test(): | 
 |     base = '' | 
 |     if sys.argv[1:]: | 
 |         fn = sys.argv[1] | 
 |         if fn == '-': | 
 |             fp = sys.stdin | 
 |         else: | 
 |             fp = open(fn) | 
 |     else: | 
 |         from io import StringIO | 
 |         fp = StringIO(test_input) | 
 |     for line in fp: | 
 |         words = line.split() | 
 |         if not words: | 
 |             continue | 
 |         url = words[0] | 
 |         parts = urlparse(url) | 
 |         print('%-10s : %s' % (url, parts)) | 
 |         abs = urljoin(base, url) | 
 |         if not base: | 
 |             base = abs | 
 |         wrapped = '<URL:%s>' % abs | 
 |         print('%-10s = %s' % (url, wrapped)) | 
 |         if len(words) == 3 and words[1] == '=': | 
 |             if wrapped != words[2]: | 
 |                 print('EXPECTED', words[2], '!!!!!!!!!!') | 
 |  | 
 | if __name__ == '__main__': | 
 |     test() |