Blame - Lib/urllib/parse.py - platform/external/python/cpython3

blob: 3e00695bf9d089c1f097aae528eb07603e969838 [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	"""Parse (absolute and relative) URLs.
				2
				3	See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
				4	UC Irvine, June 1995.
				5	"""
				6
Facundo Batista	2ac5de2	2008-07-07 18:24:11 +0000	[diff] [blame]	7	import sys
				8
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	9	__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
				10	"urlsplit", "urlunsplit"]
				11
				12	# A classification of schemes ('' means apply by default)
				13	uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
				14	'wais', 'file', 'https', 'shttp', 'mms',
				15	'prospero', 'rtsp', 'rtspu', '', 'sftp']
				16	uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
				17	'imap', 'wais', 'file', 'mms', 'https', 'shttp',
				18	'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
				19	'svn', 'svn+ssh', 'sftp']
				20	non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
				21	'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
				22	uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
				23	'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
				24	'mms', '', 'sftp']
				25	uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
				26	'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
				27	uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
				28	'nntp', 'wais', 'https', 'shttp', 'snews',
				29	'file', 'prospero', '']
				30
				31	# Characters valid in scheme names
				32	scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
				33	'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
				34	'0123456789'
				35	'+-.')
				36
				37	MAX_CACHE_SIZE = 20
				38	_parse_cache = {}
				39
				40	def clear_cache():
				41	"""Clear the parse cache."""
				42	_parse_cache.clear()
				43
				44
				45	class ResultMixin(object):
				46	"""Shared methods for the parsed result objects."""
				47
				48	@property
				49	def username(self):
				50	netloc = self.netloc
				51	if "@" in netloc:
				52	userinfo = netloc.rsplit("@", 1)[0]
				53	if ":" in userinfo:
				54	userinfo = userinfo.split(":", 1)[0]
				55	return userinfo
				56	return None
				57
				58	@property
				59	def password(self):
				60	netloc = self.netloc
				61	if "@" in netloc:
				62	userinfo = netloc.rsplit("@", 1)[0]
				63	if ":" in userinfo:
				64	return userinfo.split(":", 1)[1]
				65	return None
				66
				67	@property
				68	def hostname(self):
				69	netloc = self.netloc
				70	if "@" in netloc:
				71	netloc = netloc.rsplit("@", 1)[1]
				72	if ":" in netloc:
				73	netloc = netloc.split(":", 1)[0]
				74	return netloc.lower() or None
				75
				76	@property
				77	def port(self):
				78	netloc = self.netloc
				79	if "@" in netloc:
				80	netloc = netloc.rsplit("@", 1)[1]
				81	if ":" in netloc:
				82	port = netloc.split(":", 1)[1]
				83	return int(port, 10)
				84	return None
				85
				86	from collections import namedtuple
				87
				88	class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
				89
				90	__slots__ = ()
				91
				92	def geturl(self):
				93	return urlunsplit(self)
				94
				95
				96	class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
				97
				98	__slots__ = ()
				99
				100	def geturl(self):
				101	return urlunparse(self)
				102
				103
				104	def urlparse(url, scheme='', allow_fragments=True):
				105	"""Parse a URL into 6 components:
				106	<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
				107	Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
				108	Note that we don't break the components up in smaller bits
				109	(e.g. netloc is a single string) and we don't expand % escapes."""
				110	tuple = urlsplit(url, scheme, allow_fragments)
				111	scheme, netloc, url, query, fragment = tuple
				112	if scheme in uses_params and ';' in url:
				113	url, params = _splitparams(url)
				114	else:
				115	params = ''
				116	return ParseResult(scheme, netloc, url, params, query, fragment)
				117
				118	def _splitparams(url):
				119	if '/' in url:
				120	i = url.find(';', url.rfind('/'))
				121	if i < 0:
				122	return url, ''
				123	else:
				124	i = url.find(';')
				125	return url[:i], url[i+1:]
				126
				127	def _splitnetloc(url, start=0):
				128	delim = len(url) # position of end of domain part of url, default is end
				129	for c in '/?#': # look for delimiters; the order is NOT important
				130	wdelim = url.find(c, start) # find first of this delim
				131	if wdelim >= 0: # if found
				132	delim = min(delim, wdelim) # use earliest delim position
				133	return url[start:delim], url[delim:] # return (domain, rest)
				134
				135	def urlsplit(url, scheme='', allow_fragments=True):
				136	"""Parse a URL into 5 components:
				137	<scheme>://<netloc>/<path>?<query>#<fragment>
				138	Return a 5-tuple: (scheme, netloc, path, query, fragment).
				139	Note that we don't break the components up in smaller bits
				140	(e.g. netloc is a single string) and we don't expand % escapes."""
				141	allow_fragments = bool(allow_fragments)
				142	key = url, scheme, allow_fragments, type(url), type(scheme)
				143	cached = _parse_cache.get(key, None)
				144	if cached:
				145	return cached
				146	if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
				147	clear_cache()
				148	netloc = query = fragment = ''
				149	i = url.find(':')
				150	if i > 0:
				151	if url[:i] == 'http': # optimize the common case
				152	scheme = url[:i].lower()
				153	url = url[i+1:]
				154	if url[:2] == '//':
				155	netloc, url = _splitnetloc(url, 2)
				156	if allow_fragments and '#' in url:
				157	url, fragment = url.split('#', 1)
				158	if '?' in url:
				159	url, query = url.split('?', 1)
				160	v = SplitResult(scheme, netloc, url, query, fragment)
				161	_parse_cache[key] = v
				162	return v
				163	for c in url[:i]:
				164	if c not in scheme_chars:
				165	break
				166	else:
				167	scheme, url = url[:i].lower(), url[i+1:]
				168	if scheme in uses_netloc and url[:2] == '//':
				169	netloc, url = _splitnetloc(url, 2)
				170	if allow_fragments and scheme in uses_fragment and '#' in url:
				171	url, fragment = url.split('#', 1)
				172	if scheme in uses_query and '?' in url:
				173	url, query = url.split('?', 1)
				174	v = SplitResult(scheme, netloc, url, query, fragment)
				175	_parse_cache[key] = v
				176	return v
				177
				178	def urlunparse(components):
				179	"""Put a parsed URL back together again. This may result in a
				180	slightly different, but equivalent URL, if the URL that was parsed
				181	originally had redundant delimiters, e.g. a ? with an empty query
				182	(the draft states that these are equivalent)."""
				183	scheme, netloc, url, params, query, fragment = components
				184	if params:
				185	url = "%s;%s" % (url, params)
				186	return urlunsplit((scheme, netloc, url, query, fragment))
				187
				188	def urlunsplit(components):
				189	scheme, netloc, url, query, fragment = components
				190	if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
				191	if url and url[:1] != '/': url = '/' + url
				192	url = '//' + (netloc or '') + url
				193	if scheme:
				194	url = scheme + ':' + url
				195	if query:
				196	url = url + '?' + query
				197	if fragment:
				198	url = url + '#' + fragment
				199	return url
				200
				201	def urljoin(base, url, allow_fragments=True):
				202	"""Join a base URL and a possibly relative URL to form an absolute
				203	interpretation of the latter."""
				204	if not base:
				205	return url
				206	if not url:
				207	return base
				208	bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
				209	urlparse(base, '', allow_fragments)
				210	scheme, netloc, path, params, query, fragment = \
				211	urlparse(url, bscheme, allow_fragments)
				212	if scheme != bscheme or scheme not in uses_relative:
				213	return url
				214	if scheme in uses_netloc:
				215	if netloc:
				216	return urlunparse((scheme, netloc, path,
				217	params, query, fragment))
				218	netloc = bnetloc
				219	if path[:1] == '/':
				220	return urlunparse((scheme, netloc, path,
				221	params, query, fragment))
Facundo Batista	23e3856	2008-08-14 16:55:14 +0000	[diff] [blame]	222	if not path:
				223	path = bpath
				224	if not params:
				225	params = bparams
				226	else:
				227	path = path[:-1]
				228	return urlunparse((scheme, netloc, path,
				229	params, query, fragment))
				230	if not query:
				231	query = bquery
				232	return urlunparse((scheme, netloc, path,
				233	params, query, fragment))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	234	segments = bpath.split('/')[:-1] + path.split('/')
				235	# XXX The stuff below is bogus in various ways...
				236	if segments[-1] == '.':
				237	segments[-1] = ''
				238	while '.' in segments:
				239	segments.remove('.')
				240	while 1:
				241	i = 1
				242	n = len(segments) - 1
				243	while i < n:
				244	if (segments[i] == '..'
				245	and segments[i-1] not in ('', '..')):
				246	del segments[i-1:i+1]
				247	break
				248	i = i+1
				249	else:
				250	break
				251	if segments == ['', '..']:
				252	segments[-1] = ''
				253	elif len(segments) >= 2 and segments[-1] == '..':
				254	segments[-2:] = ['']
				255	return urlunparse((scheme, netloc, '/'.join(segments),
				256	params, query, fragment))
				257
				258	def urldefrag(url):
				259	"""Removes any existing fragment from URL.
				260
				261	Returns a tuple of the defragmented URL and the fragment. If
				262	the URL contained no fragments, the second element is the
				263	empty string.
				264	"""
				265	if '#' in url:
				266	s, n, p, a, q, frag = urlparse(url)
				267	defrag = urlunparse((s, n, p, a, q, ''))
				268	return defrag, frag
				269	else:
				270	return url, ''
				271
				272
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame]	273	_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
				274	_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	275
				276	def unquote(s):
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame]	277	"""unquote('abc%20def') -> 'abc def'."""
				278	res = s.split('%')
				279	for i in range(1, len(res)):
				280	item = res[i]
				281	try:
				282	res[i] = _hextochr[item[:2]] + item[2:]
				283	except KeyError:
				284	res[i] = '%' + item
				285	except UnicodeDecodeError:
				286	res[i] = chr(int(item[:2], 16)) + item[2:]
				287	return "".join(res)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	288
				289	def unquote_plus(s):
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame]	290	"""unquote('%7e/abc+def') -> '~/abc def'"""
				291	s = s.replace('+', ' ')
				292	return unquote(s)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	293
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame]	294	always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
				295	'abcdefghijklmnopqrstuvwxyz'
				296	'0123456789' '_.-')
				297	_safe_quoters= {}
				298
				299	class Quoter:
				300	def __init__(self, safe):
				301	self.cache = {}
				302	self.safe = safe + always_safe
				303
				304	def __call__(self, c):
				305	try:
				306	return self.cache[c]
				307	except KeyError:
				308	if ord(c) < 256:
				309	res = (c in self.safe) and c or ('%%%02X' % ord(c))
				310	self.cache[c] = res
				311	return res
				312	else:
				313	return "".join(['%%%02X' % i for i in c.encode("utf-8")])
				314
				315	def quote(s, safe = '/'):
				316	"""quote('abc def') -> 'abc%20def'
				317
				318	Each part of a URL, e.g. the path info, the query, etc., has a
				319	different set of reserved characters that must be quoted.
				320
				321	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
				322	the following reserved characters.
				323
				324	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
				325	"$" \| ","
				326
				327	Each of these characters is reserved in some component of a URL,
				328	but not necessarily in all of them.
				329
				330	By default, the quote function is intended for quoting the path
				331	section of a URL. Thus, it will not encode '/'. This character
				332	is reserved, but in typical usage the quote function is being
				333	called on a path where the existing slash characters are used as
				334	reserved characters.
				335	"""
				336	cachekey = (safe, always_safe)
				337	try:
				338	quoter = _safe_quoters[cachekey]
				339	except KeyError:
				340	quoter = Quoter(safe)
				341	_safe_quoters[cachekey] = quoter
				342	res = map(quoter, s)
				343	return ''.join(res)
				344
				345	def quote_plus(s, safe = ''):
				346	"""Quote the query fragment of a URL; replacing ' ' with '+'"""
				347	if ' ' in s:
				348	s = quote(s, safe + ' ')
				349	return s.replace(' ', '+')
				350	return quote(s, safe)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	351
				352	def urlencode(query,doseq=0):
				353	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
				354
				355	If any values in the query arg are sequences and doseq is true, each
				356	sequence element is converted to a separate parameter.
				357
				358	If the query arg is a sequence of two-element tuples, the order of the
				359	parameters in the output will match the order of parameters in the
				360	input.
				361	"""
				362
				363	if hasattr(query,"items"):
				364	# mapping objects
				365	query = query.items()
				366	else:
				367	# it's a bother at times that strings and string-like objects are
				368	# sequences...
				369	try:
				370	# non-sequence items should not work with len()
				371	# non-empty strings will fail this
				372	if len(query) and not isinstance(query[0], tuple):
				373	raise TypeError
				374	# zero-length sequences of all types will get here and succeed,
				375	# but that's a minor nit - since the original implementation
				376	# allowed empty dicts that type of behavior probably should be
				377	# preserved for consistency
				378	except TypeError:
				379	ty,va,tb = sys.exc_info()
				380	raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
				381
				382	l = []
				383	if not doseq:
				384	# preserve old behavior
				385	for k, v in query:
				386	k = quote_plus(str(k))
				387	v = quote_plus(str(v))
				388	l.append(k + '=' + v)
				389	else:
				390	for k, v in query:
				391	k = quote_plus(str(k))
				392	if isinstance(v, str):
				393	v = quote_plus(v)
				394	l.append(k + '=' + v)
				395	elif isinstance(v, str):
				396	# is there a reasonable way to convert to ASCII?
				397	# encode generates a string, but "replace" or "ignore"
				398	# lose information and "strict" can raise UnicodeError
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame]	399	v = quote_plus(v.encode("ASCII","replace"))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	400	l.append(k + '=' + v)
				401	else:
				402	try:
				403	# is this a sufficient test for sequence-ness?
				404	x = len(v)
				405	except TypeError:
				406	# not a sequence
				407	v = quote_plus(str(v))
				408	l.append(k + '=' + v)
				409	else:
				410	# loop over the sequence
				411	for elt in v:
				412	l.append(k + '=' + quote_plus(str(elt)))
				413	return '&'.join(l)
				414
				415	# Utilities to parse URLs (most of these return None for missing parts):
				416	# unwrap('<URL:type://host/path>') --> 'type://host/path'
				417	# splittype('type:opaquestring') --> 'type', 'opaquestring'
				418	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
				419	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
				420	# splitpasswd('user:passwd') -> 'user', 'passwd'
				421	# splitport('host:port') --> 'host', 'port'
				422	# splitquery('/path?query') --> '/path', 'query'
				423	# splittag('/path#tag') --> '/path', 'tag'
				424	# splitattr('/path;attr1=value1;attr2=value2;...') ->
				425	# '/path', ['attr1=value1', 'attr2=value2', ...]
				426	# splitvalue('attr=value') --> 'attr', 'value'
				427	# urllib.parse.unquote('abc%20def') -> 'abc def'
				428	# quote('abc def') -> 'abc%20def')
				429
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	430	def to_bytes(url):
				431	"""to_bytes(u"URL") --> 'URL'."""
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	432	# Most URL schemes require ASCII. If that changes, the conversion
				433	# can be relaxed.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	434	# XXX get rid of to_bytes()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	435	if isinstance(url, str):
				436	try:
				437	url = url.encode("ASCII").decode()
				438	except UnicodeError:
				439	raise UnicodeError("URL " + repr(url) +
				440	" contains non-ASCII characters")
				441	return url
				442
				443	def unwrap(url):
				444	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
				445	url = str(url).strip()
				446	if url[:1] == '<' and url[-1:] == '>':
				447	url = url[1:-1].strip()
				448	if url[:4] == 'URL:': url = url[4:].strip()
				449	return url
				450
				451	_typeprog = None
				452	def splittype(url):
				453	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
				454	global _typeprog
				455	if _typeprog is None:
				456	import re
				457	_typeprog = re.compile('^([^/:]+):')
				458
				459	match = _typeprog.match(url)
				460	if match:
				461	scheme = match.group(1)
				462	return scheme.lower(), url[len(scheme) + 1:]
				463	return None, url
				464
				465	_hostprog = None
				466	def splithost(url):
				467	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
				468	global _hostprog
				469	if _hostprog is None:
				470	import re
				471	_hostprog = re.compile('^//([^/?])(.)$')
				472
				473	match = _hostprog.match(url)
				474	if match: return match.group(1, 2)
				475	return None, url
				476
				477	_userprog = None
				478	def splituser(host):
				479	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
				480	global _userprog
				481	if _userprog is None:
				482	import re
				483	_userprog = re.compile('^(.)@(.)$')
				484
				485	match = _userprog.match(host)
Guido van Rossum	df9f1ec	2008-08-06 19:31:34 +0000	[diff] [blame]	486	if match: return map(unquote, match.group(1, 2))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	487	return None, host
				488
				489	_passwdprog = None
				490	def splitpasswd(user):
				491	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
				492	global _passwdprog
				493	if _passwdprog is None:
				494	import re
				495	_passwdprog = re.compile('^([^:]):(.)$')
				496
				497	match = _passwdprog.match(user)
				498	if match: return match.group(1, 2)
				499	return user, None
				500
				501	# splittag('/path#tag') --> '/path', 'tag'
				502	_portprog = None
				503	def splitport(host):
				504	"""splitport('host:port') --> 'host', 'port'."""
				505	global _portprog
				506	if _portprog is None:
				507	import re
				508	_portprog = re.compile('^(.*):([0-9]+)$')
				509
				510	match = _portprog.match(host)
				511	if match: return match.group(1, 2)
				512	return host, None
				513
				514	_nportprog = None
				515	def splitnport(host, defport=-1):
				516	"""Split host and port, returning numeric port.
				517	Return given default port if no ':' found; defaults to -1.
				518	Return numerical port if a valid number are found after ':'.
				519	Return None if ':' but not a valid number."""
				520	global _nportprog
				521	if _nportprog is None:
				522	import re
				523	_nportprog = re.compile('^(.):(.)$')
				524
				525	match = _nportprog.match(host)
				526	if match:
				527	host, port = match.group(1, 2)
				528	try:
				529	if not port: raise ValueError("no digits")
				530	nport = int(port)
				531	except ValueError:
				532	nport = None
				533	return host, nport
				534	return host, defport
				535
				536	_queryprog = None
				537	def splitquery(url):
				538	"""splitquery('/path?query') --> '/path', 'query'."""
				539	global _queryprog
				540	if _queryprog is None:
				541	import re
				542	_queryprog = re.compile('^(.)\?([^?])$')
				543
				544	match = _queryprog.match(url)
				545	if match: return match.group(1, 2)
				546	return url, None
				547
				548	_tagprog = None
				549	def splittag(url):
				550	"""splittag('/path#tag') --> '/path', 'tag'."""
				551	global _tagprog
				552	if _tagprog is None:
				553	import re
				554	_tagprog = re.compile('^(.)#([^#])$')
				555
				556	match = _tagprog.match(url)
				557	if match: return match.group(1, 2)
				558	return url, None
				559
				560	def splitattr(url):
				561	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
				562	'/path', ['attr1=value1', 'attr2=value2', ...]."""
				563	words = url.split(';')
				564	return words[0], words[1:]
				565
				566	_valueprog = None
				567	def splitvalue(attr):
				568	"""splitvalue('attr=value') --> 'attr', 'value'."""
				569	global _valueprog
				570	if _valueprog is None:
				571	import re
				572	_valueprog = re.compile('^([^=])=(.)$')
				573
				574	match = _valueprog.match(attr)
				575	if match: return match.group(1, 2)
				576	return attr, None
				577
				578	test_input = """
				579	http://a/b/c/d
				580
				581	g:h = <URL:g:h>
				582	http:g = <URL:http://a/b/c/g>
				583	http: = <URL:http://a/b/c/d>
				584	g = <URL:http://a/b/c/g>
				585	./g = <URL:http://a/b/c/g>
				586	g/ = <URL:http://a/b/c/g/>
				587	/g = <URL:http://a/g>
				588	//g = <URL:http://g>
				589	?y = <URL:http://a/b/c/d?y>
				590	g?y = <URL:http://a/b/c/g?y>
				591	g?y/./x = <URL:http://a/b/c/g?y/./x>
				592	. = <URL:http://a/b/c/>
				593	./ = <URL:http://a/b/c/>
				594	.. = <URL:http://a/b/>
				595	../ = <URL:http://a/b/>
				596	../g = <URL:http://a/b/g>
				597	../.. = <URL:http://a/>
				598	../../g = <URL:http://a/g>
				599	../../../g = <URL:http://a/../g>
				600	./../g = <URL:http://a/b/g>
				601	./g/. = <URL:http://a/b/c/g/>
				602	/./g = <URL:http://a/./g>
				603	g/./h = <URL:http://a/b/c/g/h>
				604	g/../h = <URL:http://a/b/c/h>
				605	http:g = <URL:http://a/b/c/g>
				606	http: = <URL:http://a/b/c/d>
				607	http:?y = <URL:http://a/b/c/d?y>
				608	http:g?y = <URL:http://a/b/c/g?y>
				609	http:g?y/./x = <URL:http://a/b/c/g?y/./x>
				610	"""
				611
				612	def test():
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	613	base = ''
				614	if sys.argv[1:]:
				615	fn = sys.argv[1]
				616	if fn == '-':
				617	fp = sys.stdin
				618	else:
				619	fp = open(fn)
				620	else:
				621	from io import StringIO
				622	fp = StringIO(test_input)
				623	for line in fp:
				624	words = line.split()
				625	if not words:
				626	continue
				627	url = words[0]
				628	parts = urlparse(url)
				629	print('%-10s : %s' % (url, parts))
				630	abs = urljoin(base, url)
				631	if not base:
				632	base = abs
				633	wrapped = '<URL:%s>' % abs
				634	print('%-10s = %s' % (url, wrapped))
				635	if len(words) == 3 and words[1] == '=':
				636	if wrapped != words[2]:
				637	print('EXPECTED', words[2], '!!!!!!!!!!')
				638
				639	if __name__ == '__main__':
				640	test()