Blame - Lib/urllib/parse.py - platform/external/python/cpython2

blob: f924a3a4a1cffdc0d9b790426181165f3e97c36c [file] [log] [blame]

Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	1	"""Parse (absolute and relative) URLs.
				2
				3	See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
				4	UC Irvine, June 1995.
				5	"""
				6
Facundo Batista	2ac5de2	2008-07-07 18:24:11 +0000	[diff] [blame]	7	import sys
				8
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	9	__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
				10	"urlsplit", "urlunsplit"]
				11
				12	# A classification of schemes ('' means apply by default)
				13	uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
				14	'wais', 'file', 'https', 'shttp', 'mms',
				15	'prospero', 'rtsp', 'rtspu', '', 'sftp']
				16	uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
				17	'imap', 'wais', 'file', 'mms', 'https', 'shttp',
				18	'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
				19	'svn', 'svn+ssh', 'sftp']
				20	non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
				21	'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
				22	uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
				23	'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
				24	'mms', '', 'sftp']
				25	uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
				26	'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
				27	uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
				28	'nntp', 'wais', 'https', 'shttp', 'snews',
				29	'file', 'prospero', '']
				30
				31	# Characters valid in scheme names
				32	scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
				33	'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
				34	'0123456789'
				35	'+-.')
				36
				37	MAX_CACHE_SIZE = 20
				38	_parse_cache = {}
				39
				40	def clear_cache():
				41	"""Clear the parse cache."""
				42	_parse_cache.clear()
				43
				44
				45	class ResultMixin(object):
				46	"""Shared methods for the parsed result objects."""
				47
				48	@property
				49	def username(self):
				50	netloc = self.netloc
				51	if "@" in netloc:
				52	userinfo = netloc.rsplit("@", 1)[0]
				53	if ":" in userinfo:
				54	userinfo = userinfo.split(":", 1)[0]
				55	return userinfo
				56	return None
				57
				58	@property
				59	def password(self):
				60	netloc = self.netloc
				61	if "@" in netloc:
				62	userinfo = netloc.rsplit("@", 1)[0]
				63	if ":" in userinfo:
				64	return userinfo.split(":", 1)[1]
				65	return None
				66
				67	@property
				68	def hostname(self):
				69	netloc = self.netloc
				70	if "@" in netloc:
				71	netloc = netloc.rsplit("@", 1)[1]
				72	if ":" in netloc:
				73	netloc = netloc.split(":", 1)[0]
				74	return netloc.lower() or None
				75
				76	@property
				77	def port(self):
				78	netloc = self.netloc
				79	if "@" in netloc:
				80	netloc = netloc.rsplit("@", 1)[1]
				81	if ":" in netloc:
				82	port = netloc.split(":", 1)[1]
				83	return int(port, 10)
				84	return None
				85
				86	from collections import namedtuple
				87
				88	class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
				89
				90	__slots__ = ()
				91
				92	def geturl(self):
				93	return urlunsplit(self)
				94
				95
				96	class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
				97
				98	__slots__ = ()
				99
				100	def geturl(self):
				101	return urlunparse(self)
				102
				103
				104	def urlparse(url, scheme='', allow_fragments=True):
				105	"""Parse a URL into 6 components:
				106	<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
				107	Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
				108	Note that we don't break the components up in smaller bits
				109	(e.g. netloc is a single string) and we don't expand % escapes."""
				110	tuple = urlsplit(url, scheme, allow_fragments)
				111	scheme, netloc, url, query, fragment = tuple
				112	if scheme in uses_params and ';' in url:
				113	url, params = _splitparams(url)
				114	else:
				115	params = ''
				116	return ParseResult(scheme, netloc, url, params, query, fragment)
				117
				118	def _splitparams(url):
				119	if '/' in url:
				120	i = url.find(';', url.rfind('/'))
				121	if i < 0:
				122	return url, ''
				123	else:
				124	i = url.find(';')
				125	return url[:i], url[i+1:]
				126
				127	def _splitnetloc(url, start=0):
				128	delim = len(url) # position of end of domain part of url, default is end
				129	for c in '/?#': # look for delimiters; the order is NOT important
				130	wdelim = url.find(c, start) # find first of this delim
				131	if wdelim >= 0: # if found
				132	delim = min(delim, wdelim) # use earliest delim position
				133	return url[start:delim], url[delim:] # return (domain, rest)
				134
				135	def urlsplit(url, scheme='', allow_fragments=True):
				136	"""Parse a URL into 5 components:
				137	<scheme>://<netloc>/<path>?<query>#<fragment>
				138	Return a 5-tuple: (scheme, netloc, path, query, fragment).
				139	Note that we don't break the components up in smaller bits
				140	(e.g. netloc is a single string) and we don't expand % escapes."""
				141	allow_fragments = bool(allow_fragments)
				142	key = url, scheme, allow_fragments, type(url), type(scheme)
				143	cached = _parse_cache.get(key, None)
				144	if cached:
				145	return cached
				146	if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
				147	clear_cache()
				148	netloc = query = fragment = ''
				149	i = url.find(':')
				150	if i > 0:
				151	if url[:i] == 'http': # optimize the common case
				152	scheme = url[:i].lower()
				153	url = url[i+1:]
				154	if url[:2] == '//':
				155	netloc, url = _splitnetloc(url, 2)
				156	if allow_fragments and '#' in url:
				157	url, fragment = url.split('#', 1)
				158	if '?' in url:
				159	url, query = url.split('?', 1)
				160	v = SplitResult(scheme, netloc, url, query, fragment)
				161	_parse_cache[key] = v
				162	return v
				163	for c in url[:i]:
				164	if c not in scheme_chars:
				165	break
				166	else:
				167	scheme, url = url[:i].lower(), url[i+1:]
				168	if scheme in uses_netloc and url[:2] == '//':
				169	netloc, url = _splitnetloc(url, 2)
				170	if allow_fragments and scheme in uses_fragment and '#' in url:
				171	url, fragment = url.split('#', 1)
				172	if scheme in uses_query and '?' in url:
				173	url, query = url.split('?', 1)
				174	v = SplitResult(scheme, netloc, url, query, fragment)
				175	_parse_cache[key] = v
				176	return v
				177
				178	def urlunparse(components):
				179	"""Put a parsed URL back together again. This may result in a
				180	slightly different, but equivalent URL, if the URL that was parsed
				181	originally had redundant delimiters, e.g. a ? with an empty query
				182	(the draft states that these are equivalent)."""
				183	scheme, netloc, url, params, query, fragment = components
				184	if params:
				185	url = "%s;%s" % (url, params)
				186	return urlunsplit((scheme, netloc, url, query, fragment))
				187
				188	def urlunsplit(components):
				189	scheme, netloc, url, query, fragment = components
				190	if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
				191	if url and url[:1] != '/': url = '/' + url
				192	url = '//' + (netloc or '') + url
				193	if scheme:
				194	url = scheme + ':' + url
				195	if query:
				196	url = url + '?' + query
				197	if fragment:
				198	url = url + '#' + fragment
				199	return url
				200
				201	def urljoin(base, url, allow_fragments=True):
				202	"""Join a base URL and a possibly relative URL to form an absolute
				203	interpretation of the latter."""
				204	if not base:
				205	return url
				206	if not url:
				207	return base
				208	bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
				209	urlparse(base, '', allow_fragments)
				210	scheme, netloc, path, params, query, fragment = \
				211	urlparse(url, bscheme, allow_fragments)
				212	if scheme != bscheme or scheme not in uses_relative:
				213	return url
				214	if scheme in uses_netloc:
				215	if netloc:
				216	return urlunparse((scheme, netloc, path,
				217	params, query, fragment))
				218	netloc = bnetloc
				219	if path[:1] == '/':
				220	return urlunparse((scheme, netloc, path,
				221	params, query, fragment))
				222	if not (path or params or query):
				223	return urlunparse((scheme, netloc, bpath,
				224	bparams, bquery, fragment))
				225	segments = bpath.split('/')[:-1] + path.split('/')
				226	# XXX The stuff below is bogus in various ways...
				227	if segments[-1] == '.':
				228	segments[-1] = ''
				229	while '.' in segments:
				230	segments.remove('.')
				231	while 1:
				232	i = 1
				233	n = len(segments) - 1
				234	while i < n:
				235	if (segments[i] == '..'
				236	and segments[i-1] not in ('', '..')):
				237	del segments[i-1:i+1]
				238	break
				239	i = i+1
				240	else:
				241	break
				242	if segments == ['', '..']:
				243	segments[-1] = ''
				244	elif len(segments) >= 2 and segments[-1] == '..':
				245	segments[-2:] = ['']
				246	return urlunparse((scheme, netloc, '/'.join(segments),
				247	params, query, fragment))
				248
				249	def urldefrag(url):
				250	"""Removes any existing fragment from URL.
				251
				252	Returns a tuple of the defragmented URL and the fragment. If
				253	the URL contained no fragments, the second element is the
				254	empty string.
				255	"""
				256	if '#' in url:
				257	s, n, p, a, q, frag = urlparse(url)
				258	defrag = urlunparse((s, n, p, a, q, ''))
				259	return defrag, frag
				260	else:
				261	return url, ''
				262
				263
Guido van Rossum	10faf6a	2008-08-06 19:29:14 +0000	[diff] [blame^]	264	def unquote_as_string (s, plus=False, charset=None):
				265	if charset is None:
				266	charset = "UTF-8"
				267	return str(unquote_as_bytes(s, plus=plus), charset, 'strict')
				268
				269	def unquote_as_bytes (s, plus=False):
				270	"""unquote('abc%20def') -> 'abc def'."""
				271	if plus:
				272	s = s.replace('+', ' ')
				273	res = s.split('%')
				274	res[0] = res[0].encode('ASCII', 'strict')
				275	for i in range(1, len(res)):
				276	res[i] = (bytes.fromhex(res[i][:2]) +
				277	res[i][2:].encode('ASCII', 'strict'))
				278	return b''.join(res)
				279
				280	_always_safe = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
				281	b'abcdefghijklmnopqrstuvwxyz'
				282	b'0123456789'
				283	b'_.-')
				284
				285	_percent_code = ord('%')
				286
				287	_hextable = b'0123456789ABCDEF'
				288
				289	def quote_as_bytes(s, safe = '/', plus=False):
				290	"""quote(b'abc@def') -> 'abc%40def'"""
				291
				292	if isinstance(s, str):
				293	s = s.encode("UTF-8", "strict")
				294	if not (isinstance(s, bytes) or isinstance(s, bytearray)):
				295	raise ValueError("Argument to quote must be either bytes "
				296	"or bytearray; string arguments will be "
				297	"converted to UTF-8 bytes")
				298
				299	safeset = _always_safe + safe.encode('ASCII', 'strict')
				300	if plus:
				301	safeset += b' '
				302
				303	result = bytearray()
				304	for i in s:
				305	if i not in safeset:
				306	result.append(_percent_code)
				307	result.append(_hextable[(i >> 4) & 0xF])
				308	result.append(_hextable[i & 0xF])
				309	else:
				310	result.append(i)
				311	if plus:
				312	result = result.replace(b' ', b'+')
				313	return result
				314
				315	def quote_as_string(s, safe = '/', plus=False):
				316	return str(quote_as_bytes(s, safe=safe, plus=plus), 'ASCII', 'strict')
				317
				318	# finally, define defaults for 'quote' and 'unquote'
				319
				320	def quote(s, safe='/'):
				321	return quote_as_string(s, safe=safe)
				322
				323	def quote_plus(s, safe=''):
				324	return quote_as_string(s, safe=safe, plus=True)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	325
				326	def unquote(s):
Guido van Rossum	10faf6a	2008-08-06 19:29:14 +0000	[diff] [blame^]	327	return unquote_as_string(s)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	328
				329	def unquote_plus(s):
Guido van Rossum	10faf6a	2008-08-06 19:29:14 +0000	[diff] [blame^]	330	return unquote_as_string(s, plus=True)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	331
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	332
				333	def urlencode(query,doseq=0):
				334	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
				335
				336	If any values in the query arg are sequences and doseq is true, each
				337	sequence element is converted to a separate parameter.
				338
				339	If the query arg is a sequence of two-element tuples, the order of the
				340	parameters in the output will match the order of parameters in the
				341	input.
				342	"""
				343
				344	if hasattr(query,"items"):
				345	# mapping objects
				346	query = query.items()
				347	else:
				348	# it's a bother at times that strings and string-like objects are
				349	# sequences...
				350	try:
				351	# non-sequence items should not work with len()
				352	# non-empty strings will fail this
				353	if len(query) and not isinstance(query[0], tuple):
				354	raise TypeError
				355	# zero-length sequences of all types will get here and succeed,
				356	# but that's a minor nit - since the original implementation
				357	# allowed empty dicts that type of behavior probably should be
				358	# preserved for consistency
				359	except TypeError:
				360	ty,va,tb = sys.exc_info()
				361	raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
				362
				363	l = []
				364	if not doseq:
				365	# preserve old behavior
				366	for k, v in query:
				367	k = quote_plus(str(k))
				368	v = quote_plus(str(v))
				369	l.append(k + '=' + v)
				370	else:
				371	for k, v in query:
				372	k = quote_plus(str(k))
				373	if isinstance(v, str):
				374	v = quote_plus(v)
				375	l.append(k + '=' + v)
				376	elif isinstance(v, str):
				377	# is there a reasonable way to convert to ASCII?
				378	# encode generates a string, but "replace" or "ignore"
				379	# lose information and "strict" can raise UnicodeError
Guido van Rossum	10faf6a	2008-08-06 19:29:14 +0000	[diff] [blame^]	380	v = quote_plus(v)
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	381	l.append(k + '=' + v)
				382	else:
				383	try:
				384	# is this a sufficient test for sequence-ness?
				385	x = len(v)
				386	except TypeError:
				387	# not a sequence
				388	v = quote_plus(str(v))
				389	l.append(k + '=' + v)
				390	else:
				391	# loop over the sequence
				392	for elt in v:
				393	l.append(k + '=' + quote_plus(str(elt)))
				394	return '&'.join(l)
				395
				396	# Utilities to parse URLs (most of these return None for missing parts):
				397	# unwrap('<URL:type://host/path>') --> 'type://host/path'
				398	# splittype('type:opaquestring') --> 'type', 'opaquestring'
				399	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
				400	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
				401	# splitpasswd('user:passwd') -> 'user', 'passwd'
				402	# splitport('host:port') --> 'host', 'port'
				403	# splitquery('/path?query') --> '/path', 'query'
				404	# splittag('/path#tag') --> '/path', 'tag'
				405	# splitattr('/path;attr1=value1;attr2=value2;...') ->
				406	# '/path', ['attr1=value1', 'attr2=value2', ...]
				407	# splitvalue('attr=value') --> 'attr', 'value'
				408	# urllib.parse.unquote('abc%20def') -> 'abc def'
				409	# quote('abc def') -> 'abc%20def')
				410
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	411	def to_bytes(url):
				412	"""to_bytes(u"URL") --> 'URL'."""
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	413	# Most URL schemes require ASCII. If that changes, the conversion
				414	# can be relaxed.
Georg Brandl	13e8946	2008-07-01 19:56:00 +0000	[diff] [blame]	415	# XXX get rid of to_bytes()
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	416	if isinstance(url, str):
				417	try:
				418	url = url.encode("ASCII").decode()
				419	except UnicodeError:
				420	raise UnicodeError("URL " + repr(url) +
				421	" contains non-ASCII characters")
				422	return url
				423
				424	def unwrap(url):
				425	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
				426	url = str(url).strip()
				427	if url[:1] == '<' and url[-1:] == '>':
				428	url = url[1:-1].strip()
				429	if url[:4] == 'URL:': url = url[4:].strip()
				430	return url
				431
				432	_typeprog = None
				433	def splittype(url):
				434	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
				435	global _typeprog
				436	if _typeprog is None:
				437	import re
				438	_typeprog = re.compile('^([^/:]+):')
				439
				440	match = _typeprog.match(url)
				441	if match:
				442	scheme = match.group(1)
				443	return scheme.lower(), url[len(scheme) + 1:]
				444	return None, url
				445
				446	_hostprog = None
				447	def splithost(url):
				448	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
				449	global _hostprog
				450	if _hostprog is None:
				451	import re
				452	_hostprog = re.compile('^//([^/?])(.)$')
				453
				454	match = _hostprog.match(url)
				455	if match: return match.group(1, 2)
				456	return None, url
				457
				458	_userprog = None
				459	def splituser(host):
				460	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
				461	global _userprog
				462	if _userprog is None:
				463	import re
				464	_userprog = re.compile('^(.)@(.)$')
				465
				466	match = _userprog.match(host)
Guido van Rossum	10faf6a	2008-08-06 19:29:14 +0000	[diff] [blame^]	467	if match:
				468	return map(unquote, match.group(1, 2))
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	469	return None, host
				470
				471	_passwdprog = None
				472	def splitpasswd(user):
				473	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
				474	global _passwdprog
				475	if _passwdprog is None:
				476	import re
				477	_passwdprog = re.compile('^([^:]):(.)$')
				478
				479	match = _passwdprog.match(user)
				480	if match: return match.group(1, 2)
				481	return user, None
				482
				483	# splittag('/path#tag') --> '/path', 'tag'
				484	_portprog = None
				485	def splitport(host):
				486	"""splitport('host:port') --> 'host', 'port'."""
				487	global _portprog
				488	if _portprog is None:
				489	import re
				490	_portprog = re.compile('^(.*):([0-9]+)$')
				491
				492	match = _portprog.match(host)
				493	if match: return match.group(1, 2)
				494	return host, None
				495
				496	_nportprog = None
				497	def splitnport(host, defport=-1):
				498	"""Split host and port, returning numeric port.
				499	Return given default port if no ':' found; defaults to -1.
				500	Return numerical port if a valid number are found after ':'.
				501	Return None if ':' but not a valid number."""
				502	global _nportprog
				503	if _nportprog is None:
				504	import re
				505	_nportprog = re.compile('^(.):(.)$')
				506
				507	match = _nportprog.match(host)
				508	if match:
				509	host, port = match.group(1, 2)
				510	try:
				511	if not port: raise ValueError("no digits")
				512	nport = int(port)
				513	except ValueError:
				514	nport = None
				515	return host, nport
				516	return host, defport
				517
				518	_queryprog = None
				519	def splitquery(url):
				520	"""splitquery('/path?query') --> '/path', 'query'."""
				521	global _queryprog
				522	if _queryprog is None:
				523	import re
				524	_queryprog = re.compile('^(.)\?([^?])$')
				525
				526	match = _queryprog.match(url)
				527	if match: return match.group(1, 2)
				528	return url, None
				529
				530	_tagprog = None
				531	def splittag(url):
				532	"""splittag('/path#tag') --> '/path', 'tag'."""
				533	global _tagprog
				534	if _tagprog is None:
				535	import re
				536	_tagprog = re.compile('^(.)#([^#])$')
				537
				538	match = _tagprog.match(url)
				539	if match: return match.group(1, 2)
				540	return url, None
				541
				542	def splitattr(url):
				543	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
				544	'/path', ['attr1=value1', 'attr2=value2', ...]."""
				545	words = url.split(';')
				546	return words[0], words[1:]
				547
				548	_valueprog = None
				549	def splitvalue(attr):
				550	"""splitvalue('attr=value') --> 'attr', 'value'."""
				551	global _valueprog
				552	if _valueprog is None:
				553	import re
				554	_valueprog = re.compile('^([^=])=(.)$')
				555
				556	match = _valueprog.match(attr)
				557	if match: return match.group(1, 2)
				558	return attr, None
				559
				560	test_input = """
				561	http://a/b/c/d
				562
				563	g:h = <URL:g:h>
				564	http:g = <URL:http://a/b/c/g>
				565	http: = <URL:http://a/b/c/d>
				566	g = <URL:http://a/b/c/g>
				567	./g = <URL:http://a/b/c/g>
				568	g/ = <URL:http://a/b/c/g/>
				569	/g = <URL:http://a/g>
				570	//g = <URL:http://g>
				571	?y = <URL:http://a/b/c/d?y>
				572	g?y = <URL:http://a/b/c/g?y>
				573	g?y/./x = <URL:http://a/b/c/g?y/./x>
				574	. = <URL:http://a/b/c/>
				575	./ = <URL:http://a/b/c/>
				576	.. = <URL:http://a/b/>
				577	../ = <URL:http://a/b/>
				578	../g = <URL:http://a/b/g>
				579	../.. = <URL:http://a/>
				580	../../g = <URL:http://a/g>
				581	../../../g = <URL:http://a/../g>
				582	./../g = <URL:http://a/b/g>
				583	./g/. = <URL:http://a/b/c/g/>
				584	/./g = <URL:http://a/./g>
				585	g/./h = <URL:http://a/b/c/g/h>
				586	g/../h = <URL:http://a/b/c/h>
				587	http:g = <URL:http://a/b/c/g>
				588	http: = <URL:http://a/b/c/d>
				589	http:?y = <URL:http://a/b/c/d?y>
				590	http:g?y = <URL:http://a/b/c/g?y>
				591	http:g?y/./x = <URL:http://a/b/c/g?y/./x>
				592	"""
				593
				594	def test():
Jeremy Hylton	1afc169	2008-06-18 20:49:58 +0000	[diff] [blame]	595	base = ''
				596	if sys.argv[1:]:
				597	fn = sys.argv[1]
				598	if fn == '-':
				599	fp = sys.stdin
				600	else:
				601	fp = open(fn)
				602	else:
				603	from io import StringIO
				604	fp = StringIO(test_input)
				605	for line in fp:
				606	words = line.split()
				607	if not words:
				608	continue
				609	url = words[0]
				610	parts = urlparse(url)
				611	print('%-10s : %s' % (url, parts))
				612	abs = urljoin(base, url)
				613	if not base:
				614	base = abs
				615	wrapped = '<URL:%s>' % abs
				616	print('%-10s = %s' % (url, wrapped))
				617	if len(words) == 3 and words[1] == '=':
				618	if wrapped != words[2]:
				619	print('EXPECTED', words[2], '!!!!!!!!!!')
				620
				621	if __name__ == '__main__':
				622	test()