Blame - Lib/urllib/parse.py - platform/external/python/cpython3

blob: fe02db5e000c143ec843ad4e61fed57dbf573885 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	"""Parse (absolute and relative) URLs.
				2
				3	See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
				4	UC Irvine, June 1995.
				5	"""
				6
Facundo Batista	2ac5de2	2008-07-07 18:24:11 +0000	[diff] [blame]	7	import sys
				8
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	9	__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
				10	"urlsplit", "urlunsplit"]
				11
				12	# A classification of schemes ('' means apply by default)
				13	uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
				14	'wais', 'file', 'https', 'shttp', 'mms',
				15	'prospero', 'rtsp', 'rtspu', '', 'sftp']
				16	uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
				17	'imap', 'wais', 'file', 'mms', 'https', 'shttp',
				18	'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
				19	'svn', 'svn+ssh', 'sftp']
				20	non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
				21	'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
				22	uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
				23	'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
				24	'mms', '', 'sftp']
				25	uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
				26	'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
				27	uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
				28	'nntp', 'wais', 'https', 'shttp', 'snews',
				29	'file', 'prospero', '']
				30
				31	# Characters valid in scheme names
				32	scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
				33	'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
				34	'0123456789'
				35	'+-.')
				36
				37	MAX_CACHE_SIZE = 20
				38	_parse_cache = {}
				39
				40	def clear_cache():
				41	"""Clear the parse cache."""
				42	_parse_cache.clear()
				43
				44
				45	class ResultMixin(object):
				46	"""Shared methods for the parsed result objects."""
				47
				48	@property
				49	def username(self):
				50	netloc = self.netloc
				51	if "@" in netloc:
				52	userinfo = netloc.rsplit("@", 1)[0]
				53	if ":" in userinfo:
				54	userinfo = userinfo.split(":", 1)[0]
				55	return userinfo
				56	return None
				57
				58	@property
				59	def password(self):
				60	netloc = self.netloc
				61	if "@" in netloc:
				62	userinfo = netloc.rsplit("@", 1)[0]
				63	if ":" in userinfo:
				64	return userinfo.split(":", 1)[1]
				65	return None
				66
				67	@property
				68	def hostname(self):
				69	netloc = self.netloc
				70	if "@" in netloc:
				71	netloc = netloc.rsplit("@", 1)[1]
				72	if ":" in netloc:
				73	netloc = netloc.split(":", 1)[0]
				74	return netloc.lower() or None
				75
				76	@property
				77	def port(self):
				78	netloc = self.netloc
				79	if "@" in netloc:
				80	netloc = netloc.rsplit("@", 1)[1]
				81	if ":" in netloc:
				82	port = netloc.split(":", 1)[1]
				83	return int(port, 10)
				84	return None
				85
				86	from collections import namedtuple
				87
				88	class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
				89
				90	__slots__ = ()
				91
				92	def geturl(self):
				93	return urlunsplit(self)
				94
				95
				96	class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
				97
				98	__slots__ = ()
				99
				100	def geturl(self):
				101	return urlunparse(self)
				102
				103
				104	def urlparse(url, scheme='', allow_fragments=True):
				105	"""Parse a URL into 6 components:
				106	<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
				107	Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
				108	Note that we don't break the components up in smaller bits
				109	(e.g. netloc is a single string) and we don't expand % escapes."""
				110	tuple = urlsplit(url, scheme, allow_fragments)
				111	scheme, netloc, url, query, fragment = tuple
				112	if scheme in uses_params and ';' in url:
				113	url, params = _splitparams(url)
				114	else:
				115	params = ''
				116	return ParseResult(scheme, netloc, url, params, query, fragment)
				117
				118	def _splitparams(url):
				119	if '/' in url:
				120	i = url.find(';', url.rfind('/'))
				121	if i < 0:
				122	return url, ''
				123	else:
				124	i = url.find(';')
				125	return url[:i], url[i+1:]
				126
				127	def _splitnetloc(url, start=0):
				128	delim = len(url) # position of end of domain part of url, default is end
				129	for c in '/?#': # look for delimiters; the order is NOT important
				130	wdelim = url.find(c, start) # find first of this delim
				131	if wdelim >= 0: # if found
				132	delim = min(delim, wdelim) # use earliest delim position
				133	return url[start:delim], url[delim:] # return (domain, rest)
				134
				135	def urlsplit(url, scheme='', allow_fragments=True):
				136	"""Parse a URL into 5 components:
				137	<scheme>://<netloc>/<path>?<query>#<fragment>
				138	Return a 5-tuple: (scheme, netloc, path, query, fragment).
				139	Note that we don't break the components up in smaller bits
				140	(e.g. netloc is a single string) and we don't expand % escapes."""
				141	allow_fragments = bool(allow_fragments)
				142	key = url, scheme, allow_fragments, type(url), type(scheme)
				143	cached = _parse_cache.get(key, None)
				144	if cached:
				145	return cached
				146	if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
				147	clear_cache()
				148	netloc = query = fragment = ''
				149	i = url.find(':')
				150	if i > 0:
				151	if url[:i] == 'http': # optimize the common case
				152	scheme = url[:i].lower()
				153	url = url[i+1:]
				154	if url[:2] == '//':
				155	netloc, url = _splitnetloc(url, 2)
				156	if allow_fragments and '#' in url:
				157	url, fragment = url.split('#', 1)
				158	if '?' in url:
				159	url, query = url.split('?', 1)
				160	v = SplitResult(scheme, netloc, url, query, fragment)
				161	_parse_cache[key] = v
				162	return v
				163	for c in url[:i]:
				164	if c not in scheme_chars:
				165	break
				166	else:
				167	scheme, url = url[:i].lower(), url[i+1:]
				168	if scheme in uses_netloc and url[:2] == '//':
				169	netloc, url = _splitnetloc(url, 2)
				170	if allow_fragments and scheme in uses_fragment and '#' in url:
				171	url, fragment = url.split('#', 1)
				172	if scheme in uses_query and '?' in url:
				173	url, query = url.split('?', 1)
				174	v = SplitResult(scheme, netloc, url, query, fragment)
				175	_parse_cache[key] = v
				176	return v
				177
				178	def urlunparse(components):
				179	"""Put a parsed URL back together again. This may result in a
				180	slightly different, but equivalent URL, if the URL that was parsed
				181	originally had redundant delimiters, e.g. a ? with an empty query
				182	(the draft states that these are equivalent)."""
				183	scheme, netloc, url, params, query, fragment = components
				184	if params:
				185	url = "%s;%s" % (url, params)
				186	return urlunsplit((scheme, netloc, url, query, fragment))
				187
				188	def urlunsplit(components):
				189	scheme, netloc, url, query, fragment = components
				190	if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
				191	if url and url[:1] != '/': url = '/' + url
				192	url = '//' + (netloc or '') + url
				193	if scheme:
				194	url = scheme + ':' + url
				195	if query:
				196	url = url + '?' + query
				197	if fragment:
				198	url = url + '#' + fragment
				199	return url
				200
				201	def urljoin(base, url, allow_fragments=True):
				202	"""Join a base URL and a possibly relative URL to form an absolute
				203	interpretation of the latter."""
				204	if not base:
				205	return url
				206	if not url:
				207	return base
				208	bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
				209	urlparse(base, '', allow_fragments)
				210	scheme, netloc, path, params, query, fragment = \
				211	urlparse(url, bscheme, allow_fragments)
				212	if scheme != bscheme or scheme not in uses_relative:
				213	return url
				214	if scheme in uses_netloc:
				215	if netloc:
				216	return urlunparse((scheme, netloc, path,
				217	params, query, fragment))
				218	netloc = bnetloc
				219	if path[:1] == '/':
				220	return urlunparse((scheme, netloc, path,
				221	params, query, fragment))
				222	if not (path or params or query):
				223	return urlunparse((scheme, netloc, bpath,
				224	bparams, bquery, fragment))
				225	segments = bpath.split('/')[:-1] + path.split('/')
				226	# XXX The stuff below is bogus in various ways...
				227	if segments[-1] == '.':
				228	segments[-1] = ''
				229	while '.' in segments:
				230	segments.remove('.')
				231	while 1:
				232	i = 1
				233	n = len(segments) - 1
				234	while i < n:
				235	if (segments[i] == '..'
				236	and segments[i-1] not in ('', '..')):
				237	del segments[i-1:i+1]
				238	break
				239	i = i+1
				240	else:
				241	break
				242	if segments == ['', '..']:
				243	segments[-1] = ''
				244	elif len(segments) >= 2 and segments[-1] == '..':
				245	segments[-2:] = ['']
				246	return urlunparse((scheme, netloc, '/'.join(segments),
				247	params, query, fragment))
				248
				249	def urldefrag(url):
				250	"""Removes any existing fragment from URL.
				251
				252	Returns a tuple of the defragmented URL and the fragment. If
				253	the URL contained no fragments, the second element is the
				254	empty string.
				255	"""
				256	if '#' in url:
				257	s, n, p, a, q, frag = urlparse(url)
				258	defrag = urlunparse((s, n, p, a, q, ''))
				259	return defrag, frag
				260	else:
				261	return url, ''
				262
				263
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame^]	264	_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
				265	_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	266
				267	def unquote(s):
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame^]	268	"""unquote('abc%20def') -> 'abc def'."""
				269	res = s.split('%')
				270	for i in range(1, len(res)):
				271	item = res[i]
				272	try:
				273	res[i] = _hextochr[item[:2]] + item[2:]
				274	except KeyError:
				275	res[i] = '%' + item
				276	except UnicodeDecodeError:
				277	res[i] = chr(int(item[:2], 16)) + item[2:]
				278	return "".join(res)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	279
				280	def unquote_plus(s):
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame^]	281	"""unquote('%7e/abc+def') -> '~/abc def'"""
				282	s = s.replace('+', ' ')
				283	return unquote(s)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	284
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame^]	285	always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
				286	'abcdefghijklmnopqrstuvwxyz'
				287	'0123456789' '_.-')
				288	_safe_quoters= {}
				289
				290	class Quoter:
				291	def __init__(self, safe):
				292	self.cache = {}
				293	self.safe = safe + always_safe
				294
				295	def __call__(self, c):
				296	try:
				297	return self.cache[c]
				298	except KeyError:
				299	if ord(c) < 256:
				300	res = (c in self.safe) and c or ('%%%02X' % ord(c))
				301	self.cache[c] = res
				302	return res
				303	else:
				304	return "".join(['%%%02X' % i for i in c.encode("utf-8")])
				305
				306	def quote(s, safe = '/'):
				307	"""quote('abc def') -> 'abc%20def'
				308
				309	Each part of a URL, e.g. the path info, the query, etc., has a
				310	different set of reserved characters that must be quoted.
				311
				312	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
				313	the following reserved characters.
				314
				315	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
				316	"$" \| ","
				317
				318	Each of these characters is reserved in some component of a URL,
				319	but not necessarily in all of them.
				320
				321	By default, the quote function is intended for quoting the path
				322	section of a URL. Thus, it will not encode '/'. This character
				323	is reserved, but in typical usage the quote function is being
				324	called on a path where the existing slash characters are used as
				325	reserved characters.
				326	"""
				327	cachekey = (safe, always_safe)
				328	try:
				329	quoter = _safe_quoters[cachekey]
				330	except KeyError:
				331	quoter = Quoter(safe)
				332	_safe_quoters[cachekey] = quoter
				333	res = map(quoter, s)
				334	return ''.join(res)
				335
				336	def quote_plus(s, safe = ''):
				337	"""Quote the query fragment of a URL; replacing ' ' with '+'"""
				338	if ' ' in s:
				339	s = quote(s, safe + ' ')
				340	return s.replace(' ', '+')
				341	return quote(s, safe)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	342
				343	def urlencode(query,doseq=0):
				344	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
				345
				346	If any values in the query arg are sequences and doseq is true, each
				347	sequence element is converted to a separate parameter.
				348
				349	If the query arg is a sequence of two-element tuples, the order of the
				350	parameters in the output will match the order of parameters in the
				351	input.
				352	"""
				353
				354	if hasattr(query,"items"):
				355	# mapping objects
				356	query = query.items()
				357	else:
				358	# it's a bother at times that strings and string-like objects are
				359	# sequences...
				360	try:
				361	# non-sequence items should not work with len()
				362	# non-empty strings will fail this
				363	if len(query) and not isinstance(query[0], tuple):
				364	raise TypeError
				365	# zero-length sequences of all types will get here and succeed,
				366	# but that's a minor nit - since the original implementation
				367	# allowed empty dicts that type of behavior probably should be
				368	# preserved for consistency
				369	except TypeError:
				370	ty,va,tb = sys.exc_info()
				371	raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
				372
				373	l = []
				374	if not doseq:
				375	# preserve old behavior
				376	for k, v in query:
				377	k = quote_plus(str(k))
				378	v = quote_plus(str(v))
				379	l.append(k + '=' + v)
				380	else:
				381	for k, v in query:
				382	k = quote_plus(str(k))
				383	if isinstance(v, str):
				384	v = quote_plus(v)
				385	l.append(k + '=' + v)
				386	elif isinstance(v, str):
				387	# is there a reasonable way to convert to ASCII?
				388	# encode generates a string, but "replace" or "ignore"
				389	# lose information and "strict" can raise UnicodeError
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame^]	390	v = quote_plus(v.encode("ASCII","replace"))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	391	l.append(k + '=' + v)
				392	else:
				393	try:
				394	# is this a sufficient test for sequence-ness?
				395	x = len(v)
				396	except TypeError:
				397	# not a sequence
				398	v = quote_plus(str(v))
				399	l.append(k + '=' + v)
				400	else:
				401	# loop over the sequence
				402	for elt in v:
				403	l.append(k + '=' + quote_plus(str(elt)))
				404	return '&'.join(l)
				405
				406	# Utilities to parse URLs (most of these return None for missing parts):
				407	# unwrap('<URL:type://host/path>') --> 'type://host/path'
				408	# splittype('type:opaquestring') --> 'type', 'opaquestring'
				409	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
				410	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
				411	# splitpasswd('user:passwd') -> 'user', 'passwd'
				412	# splitport('host:port') --> 'host', 'port'
				413	# splitquery('/path?query') --> '/path', 'query'
				414	# splittag('/path#tag') --> '/path', 'tag'
				415	# splitattr('/path;attr1=value1;attr2=value2;...') ->
				416	# '/path', ['attr1=value1', 'attr2=value2', ...]
				417	# splitvalue('attr=value') --> 'attr', 'value'
				418	# urllib.parse.unquote('abc%20def') -> 'abc def'
				419	# quote('abc def') -> 'abc%20def')
				420
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	421	def to_bytes(url):
				422	"""to_bytes(u"URL") --> 'URL'."""
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	423	# Most URL schemes require ASCII. If that changes, the conversion
				424	# can be relaxed.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	425	# XXX get rid of to_bytes()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	426	if isinstance(url, str):
				427	try:
				428	url = url.encode("ASCII").decode()
				429	except UnicodeError:
				430	raise UnicodeError("URL " + repr(url) +
				431	" contains non-ASCII characters")
				432	return url
				433
				434	def unwrap(url):
				435	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
				436	url = str(url).strip()
				437	if url[:1] == '<' and url[-1:] == '>':
				438	url = url[1:-1].strip()
				439	if url[:4] == 'URL:': url = url[4:].strip()
				440	return url
				441
				442	_typeprog = None
				443	def splittype(url):
				444	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
				445	global _typeprog
				446	if _typeprog is None:
				447	import re
				448	_typeprog = re.compile('^([^/:]+):')
				449
				450	match = _typeprog.match(url)
				451	if match:
				452	scheme = match.group(1)
				453	return scheme.lower(), url[len(scheme) + 1:]
				454	return None, url
				455
				456	_hostprog = None
				457	def splithost(url):
				458	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
				459	global _hostprog
				460	if _hostprog is None:
				461	import re
				462	_hostprog = re.compile('^//([^/?])(.)$')
				463
				464	match = _hostprog.match(url)
				465	if match: return match.group(1, 2)
				466	return None, url
				467
				468	_userprog = None
				469	def splituser(host):
				470	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
				471	global _userprog
				472	if _userprog is None:
				473	import re
				474	_userprog = re.compile('^(.)@(.)$')
				475
				476	match = _userprog.match(host)
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame^]	477	if match: return map(unquote, match.group(1, 2))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	478	return None, host
				479
				480	_passwdprog = None
				481	def splitpasswd(user):
				482	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
				483	global _passwdprog
				484	if _passwdprog is None:
				485	import re
				486	_passwdprog = re.compile('^([^:]):(.)$')
				487
				488	match = _passwdprog.match(user)
				489	if match: return match.group(1, 2)
				490	return user, None
				491
				492	# splittag('/path#tag') --> '/path', 'tag'
				493	_portprog = None
				494	def splitport(host):
				495	"""splitport('host:port') --> 'host', 'port'."""
				496	global _portprog
				497	if _portprog is None:
				498	import re
				499	_portprog = re.compile('^(.*):([0-9]+)$')
				500
				501	match = _portprog.match(host)
				502	if match: return match.group(1, 2)
				503	return host, None
				504
				505	_nportprog = None
				506	def splitnport(host, defport=-1):
				507	"""Split host and port, returning numeric port.
				508	Return given default port if no ':' found; defaults to -1.
				509	Return numerical port if a valid number are found after ':'.
				510	Return None if ':' but not a valid number."""
				511	global _nportprog
				512	if _nportprog is None:
				513	import re
				514	_nportprog = re.compile('^(.):(.)$')
				515
				516	match = _nportprog.match(host)
				517	if match:
				518	host, port = match.group(1, 2)
				519	try:
				520	if not port: raise ValueError("no digits")
				521	nport = int(port)
				522	except ValueError:
				523	nport = None
				524	return host, nport
				525	return host, defport
				526
				527	_queryprog = None
				528	def splitquery(url):
				529	"""splitquery('/path?query') --> '/path', 'query'."""
				530	global _queryprog
				531	if _queryprog is None:
				532	import re
				533	_queryprog = re.compile('^(.)\?([^?])$')
				534
				535	match = _queryprog.match(url)
				536	if match: return match.group(1, 2)
				537	return url, None
				538
				539	_tagprog = None
				540	def splittag(url):
				541	"""splittag('/path#tag') --> '/path', 'tag'."""
				542	global _tagprog
				543	if _tagprog is None:
				544	import re
				545	_tagprog = re.compile('^(.)#([^#])$')
				546
				547	match = _tagprog.match(url)
				548	if match: return match.group(1, 2)
				549	return url, None
				550
				551	def splitattr(url):
				552	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
				553	'/path', ['attr1=value1', 'attr2=value2', ...]."""
				554	words = url.split(';')
				555	return words[0], words[1:]
				556
				557	_valueprog = None
				558	def splitvalue(attr):
				559	"""splitvalue('attr=value') --> 'attr', 'value'."""
				560	global _valueprog
				561	if _valueprog is None:
				562	import re
				563	_valueprog = re.compile('^([^=])=(.)$')
				564
				565	match = _valueprog.match(attr)
				566	if match: return match.group(1, 2)
				567	return attr, None
				568
				569	test_input = """
				570	http://a/b/c/d
				571
				572	g:h = <URL:g:h>
				573	http:g = <URL:http://a/b/c/g>
				574	http: = <URL:http://a/b/c/d>
				575	g = <URL:http://a/b/c/g>
				576	./g = <URL:http://a/b/c/g>
				577	g/ = <URL:http://a/b/c/g/>
				578	/g = <URL:http://a/g>
				579	//g = <URL:http://g>
				580	?y = <URL:http://a/b/c/d?y>
				581	g?y = <URL:http://a/b/c/g?y>
				582	g?y/./x = <URL:http://a/b/c/g?y/./x>
				583	. = <URL:http://a/b/c/>
				584	./ = <URL:http://a/b/c/>
				585	.. = <URL:http://a/b/>
				586	../ = <URL:http://a/b/>
				587	../g = <URL:http://a/b/g>
				588	../.. = <URL:http://a/>
				589	../../g = <URL:http://a/g>
				590	../../../g = <URL:http://a/../g>
				591	./../g = <URL:http://a/b/g>
				592	./g/. = <URL:http://a/b/c/g/>
				593	/./g = <URL:http://a/./g>
				594	g/./h = <URL:http://a/b/c/g/h>
				595	g/../h = <URL:http://a/b/c/h>
				596	http:g = <URL:http://a/b/c/g>
				597	http: = <URL:http://a/b/c/d>
				598	http:?y = <URL:http://a/b/c/d?y>
				599	http:g?y = <URL:http://a/b/c/g?y>
				600	http:g?y/./x = <URL:http://a/b/c/g?y/./x>
				601	"""
				602
				603	def test():
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	604	base = ''
				605	if sys.argv[1:]:
				606	fn = sys.argv[1]
				607	if fn == '-':
				608	fp = sys.stdin
				609	else:
				610	fp = open(fn)
				611	else:
				612	from io import StringIO
				613	fp = StringIO(test_input)
				614	for line in fp:
				615	words = line.split()
				616	if not words:
				617	continue
				618	url = words[0]
				619	parts = urlparse(url)
				620	print('%-10s : %s' % (url, parts))
				621	abs = urljoin(base, url)
				622	if not base:
				623	base = abs
				624	wrapped = '<URL:%s>' % abs
				625	print('%-10s = %s' % (url, wrapped))
				626	if len(words) == 3 and words[1] == '=':
				627	if wrapped != words[2]:
				628	print('EXPECTED', words[2], '!!!!!!!!!!')
				629
				630	if __name__ == '__main__':
				631	test()