Blame - jdk/src/share/classes/java/net/URI.java - platform/libcore

blob: 1f8e4c033b32574b05c376a946e9e9f0dea16e87 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package java.net;
				27
				28	import java.io.IOException;
				29	import java.io.InvalidObjectException;
				30	import java.io.ObjectInputStream;
				31	import java.io.ObjectOutputStream;
				32	import java.io.Serializable;
				33	import java.nio.ByteBuffer;
				34	import java.nio.CharBuffer;
				35	import java.nio.charset.CharsetDecoder;
				36	import java.nio.charset.CharsetEncoder;
				37	import java.nio.charset.CoderResult;
				38	import java.nio.charset.CodingErrorAction;
				39	import java.nio.charset.CharacterCodingException;
				40	import java.text.Normalizer;
				41	import sun.nio.cs.ThreadLocalCoders;
				42
				43	import java.lang.Character; // for javadoc
				44	import java.lang.NullPointerException; // for javadoc
				45
				46
				47	/**
				48	* Represents a Uniform Resource Identifier (URI) reference.
				49	*
				50	* <p> Aside from some minor deviations noted below, an instance of this
				51	* class represents a URI reference as defined by
				52	* <a href="http://www.ietf.org/rfc/rfc2396.txt""><i>RFC 2396: Uniform
				53	* Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
				54	* href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
				55	* Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
				56	* also supports scope_ids. The syntax and usage of scope_ids is described
				57	* <a href="Inet6Address.html#scoped">here</a>.
				58	* This class provides constructors for creating URI instances from
				59	* their components or by parsing their string forms, methods for accessing the
				60	* various components of an instance, and methods for normalizing, resolving,
				61	* and relativizing URI instances. Instances of this class are immutable.
				62	*
				63	*
				64	* <h4> URI syntax and components </h4>
				65	*
				66	* At the highest level a URI reference (hereinafter simply "URI") in string
				67	* form has the syntax
				68	*
				69	* <blockquote>
				70	* [<i>scheme</i><tt><b>:</b></tt><i></i>]<i>scheme-specific-part</i>[<tt><b>#</b></tt><i>fragment</i>]
				71	* </blockquote>
				72	*
				73	* where square brackets [...] delineate optional components and the characters
				74	* <tt><b>:</b></tt> and <tt><b>#</b></tt> stand for themselves.
				75	*
				76	* <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
				77	* said to be <i>relative</i>. URIs are also classified according to whether
				78	* they are <i>opaque</i> or <i>hierarchical</i>.
				79	*
				80	* <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
				81	* not begin with a slash character (<tt>'/'</tt>). Opaque URIs are not
				82	* subject to further parsing. Some examples of opaque URIs are:
				83	*
				84	* <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
				85	* <tr><td><tt>mailto:java-net@java.sun.com</tt><td></tr>
				86	* <tr><td><tt>news:comp.lang.java</tt><td></tr>
				87	* <tr><td><tt>urn:isbn:096139210x</tt></td></tr>
				88	* </table></blockquote>
				89	*
				90	* <p> A <i>hierarchical</i> URI is either an absolute URI whose
				91	* scheme-specific part begins with a slash character, or a relative URI, that
				92	* is, a URI that does not specify a scheme. Some examples of hierarchical
				93	* URIs are:
				94	*
				95	* <blockquote>
				96	* <tt>http://java.sun.com/j2se/1.3/</tt><br>
				97	* <tt>docs/guide/collections/designfaq.html#28</tt><br>
				98	* <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java</tt><br>
				99	* <tt>file:///~/calendar</tt>
				100	* </blockquote>
				101	*
				102	* <p> A hierarchical URI is subject to further parsing according to the syntax
				103	*
				104	* <blockquote>
				105	* [<i>scheme</i><tt><b>:</b></tt>][<tt><b>//</b></tt><i>authority</i>][<i>path</i>][<tt><b>?</b></tt><i>query</i>][<tt><b>#</b></tt><i>fragment</i>]
				106	* </blockquote>
				107	*
				108	* where the characters <tt><b>:</b></tt>, <tt><b>/</b></tt>,
				109	* <tt><b>?</b></tt>, and <tt><b>#</b></tt> stand for themselves. The
				110	* scheme-specific part of a hierarchical URI consists of the characters
				111	* between the scheme and fragment components.
				112	*
				113	* <p> The authority component of a hierarchical URI is, if specified, either
				114	* <i>server-based</i> or <i>registry-based</i>. A server-based authority
				115	* parses according to the familiar syntax
				116	*
				117	* <blockquote>
				118	* [<i>user-info</i><tt><b>@</b></tt>]<i>host</i>[<tt><b>:</b></tt><i>port</i>]
				119	* </blockquote>
				120	*
				121	* where the characters <tt><b>@</b></tt> and <tt><b>:</b></tt> stand for
				122	* themselves. Nearly all URI schemes currently in use are server-based. An
				123	* authority component that does not parse in this way is considered to be
				124	* registry-based.
				125	*
				126	* <p> The path component of a hierarchical URI is itself said to be absolute
				127	* if it begins with a slash character (<tt>'/'</tt>); otherwise it is
				128	* relative. The path of a hierarchical URI that is either absolute or
				129	* specifies an authority is always absolute.
				130	*
				131	* <p> All told, then, a URI instance has the following nine components:
				132	*
				133	* <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
				134	* <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
				135	* <tr><td>scheme</td><td><tt>String</tt></td></tr>
				136	* <tr><td>scheme-specific-part    </td><td><tt>String</tt></td></tr>
				137	* <tr><td>authority</td><td><tt>String</tt></td></tr>
				138	* <tr><td>user-info</td><td><tt>String</tt></td></tr>
				139	* <tr><td>host</td><td><tt>String</tt></td></tr>
				140	* <tr><td>port</td><td><tt>int</tt></td></tr>
				141	* <tr><td>path</td><td><tt>String</tt></td></tr>
				142	* <tr><td>query</td><td><tt>String</tt></td></tr>
				143	* <tr><td>fragment</td><td><tt>String</tt></td></tr>
				144	* </table></blockquote>
				145	*
				146	* In a given instance any particular component is either <i>undefined</i> or
				147	* <i>defined</i> with a distinct value. Undefined string components are
				148	* represented by <tt>null</tt>, while undefined integer components are
				149	* represented by <tt>-1</tt>. A string component may be defined to have the
				150	* empty string as its value; this is not equivalent to that component being
				151	* undefined.
				152	*
				153	* <p> Whether a particular component is or is not defined in an instance
				154	* depends upon the type of the URI being represented. An absolute URI has a
				155	* scheme component. An opaque URI has a scheme, a scheme-specific part, and
				156	* possibly a fragment, but has no other components. A hierarchical URI always
				157	* has a path (though it may be empty) and a scheme-specific-part (which at
				158	* least contains the path), and may have any of the other components. If the
				159	* authority component is present and is server-based then the host component
				160	* will be defined and the user-information and port components may be defined.
				161	*
				162	*
				163	* <h4> Operations on URI instances </h4>
				164	*
				165	* The key operations supported by this class are those of
				166	* <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
				167	*
				168	* <p> <i>Normalization</i> is the process of removing unnecessary <tt>"."</tt>
				169	* and <tt>".."</tt> segments from the path component of a hierarchical URI.
				170	* Each <tt>"."</tt> segment is simply removed. A <tt>".."</tt> segment is
				171	* removed only if it is preceded by a non-<tt>".."</tt> segment.
				172	* Normalization has no effect upon opaque URIs.
				173	*
				174	* <p> <i>Resolution</i> is the process of resolving one URI against another,
				175	* <i>base</i> URI. The resulting URI is constructed from components of both
				176	* URIs in the manner specified by RFC 2396, taking components from the
				177	* base URI for those not specified in the original. For hierarchical URIs,
				178	* the path of the original is resolved against the path of the base and then
				179	* normalized. The result, for example, of resolving
				180	*
				181	* <blockquote>
				182	* <tt>docs/guide/collections/designfaq.html#28          </tt>(1)
				183	* </blockquote>
				184	*
				185	* against the base URI <tt>http://java.sun.com/j2se/1.3/</tt> is the result
				186	* URI
				187	*
				188	* <blockquote>
				189	* <tt>http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28</tt>
				190	* </blockquote>
				191	*
				192	* Resolving the relative URI
				193	*
				194	* <blockquote>
				195	* <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java    </tt>(2)
				196	* </blockquote>
				197	*
				198	* against this result yields, in turn,
				199	*
				200	* <blockquote>
				201	* <tt>http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java</tt>
				202	* </blockquote>
				203	*
				204	* Resolution of both absolute and relative URIs, and of both absolute and
				205	* relative paths in the case of hierarchical URIs, is supported. Resolving
				206	* the URI <tt>file:///~calendar</tt> against any other URI simply yields the
				207	* original URI, since it is absolute. Resolving the relative URI (2) above
				208	* against the relative base URI (1) yields the normalized, but still relative,
				209	* URI
				210	*
				211	* <blockquote>
				212	* <tt>demo/jfc/SwingSet2/src/SwingSet2.java</tt>
				213	* </blockquote>
				214	*
				215	* <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
				216	* two normalized URIs <i>u</i> and <i>v</i>,
				217	*
				218	* <blockquote>
				219	* <i>u</i><tt>.relativize(</tt><i>u</i><tt>.resolve(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  and<br>
				220	* <i>u</i><tt>.resolve(</tt><i>u</i><tt>.relativize(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  .<br>
				221	* </blockquote>
				222	*
				223	* This operation is often useful when constructing a document containing URIs
				224	* that must be made relative to the base URI of the document wherever
				225	* possible. For example, relativizing the URI
				226	*
				227	* <blockquote>
				228	* <tt>http://java.sun.com/j2se/1.3/docs/guide/index.html</tt>
				229	* </blockquote>
				230	*
				231	* against the base URI
				232	*
				233	* <blockquote>
				234	* <tt>http://java.sun.com/j2se/1.3</tt>
				235	* </blockquote>
				236	*
				237	* yields the relative URI <tt>docs/guide/index.html</tt>.
				238	*
				239	*
				240	* <h4> Character categories </h4>
				241	*
				242	* RFC 2396 specifies precisely which characters are permitted in the
				243	* various components of a URI reference. The following categories, most of
				244	* which are taken from that specification, are used below to describe these
				245	* constraints:
				246	*
				247	* <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
				248	* <tr><th valign=top><i>alpha</i></th>
				249	* <td>The US-ASCII alphabetic characters,
				250	* <tt>'A'</tt> through <tt>'Z'</tt>
				251	* and <tt>'a'</tt> through <tt>'z'</tt></td></tr>
				252	* <tr><th valign=top><i>digit</i></th>
				253	* <td>The US-ASCII decimal digit characters,
				254	* <tt>'0'</tt> through <tt>'9'</tt></td></tr>
				255	* <tr><th valign=top><i>alphanum</i></th>
				256	* <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
				257	* <tr><th valign=top><i>unreserved</i>    </th>
				258	* <td>All <i>alphanum</i> characters together with those in the string
				259	* <tt>"_-!.~'()*"</tt></td></tr>
				260	* <tr><th valign=top><i>punct</i></th>
				261	* <td>The characters in the string <tt>",;:$&+="</tt></td></tr>
				262	* <tr><th valign=top><i>reserved</i></th>
				263	* <td>All <i>punct</i> characters together with those in the string
				264	* <tt>"?/[]@"</tt></td></tr>
				265	* <tr><th valign=top><i>escaped</i></th>
				266	* <td>Escaped octets, that is, triplets consisting of the percent
				267	* character (<tt>'%'</tt>) followed by two hexadecimal digits
				268	* (<tt>'0'</tt>-<tt>'9'</tt>, <tt>'A'</tt>-<tt>'F'</tt>, and
				269	* <tt>'a'</tt>-<tt>'f'</tt>)</td></tr>
				270	* <tr><th valign=top><i>other</i></th>
				271	* <td>The Unicode characters that are not in the US-ASCII character set,
				272	* are not control characters (according to the {@link
				273	* java.lang.Character#isISOControl(char) Character.isISOControl}
				274	* method), and are not space characters (according to the {@link
				275	* java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
				276	* method)  <i>(<b>Deviation from RFC 2396</b>, which is
				277	* limited to US-ASCII)</i></td></tr>
				278	* </table></blockquote>
				279	*
				280	* <p><a name="legal-chars"></a> The set of all legal URI characters consists of
				281	* the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
				282	* characters.
				283	*
				284	*
				285	* <h4> Escaped octets, quotation, encoding, and decoding </h4>
				286	*
				287	* RFC 2396 allows escaped octets to appear in the user-info, path, query, and
				288	* fragment components. Escaping serves two purposes in URIs:
				289	*
				290	* <ul>
				291	*
				292	* <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
				293	* conform strictly to RFC 2396 by not containing any <i>other</i>
				294	* characters. </p></li>
				295	*
				296	* <li><p> To <i>quote</i> characters that are otherwise illegal in a
				297	* component. The user-info, path, query, and fragment components differ
				298	* slightly in terms of which characters are considered legal and illegal.
				299	* </p></li>
				300	*
				301	* </ul>
				302	*
				303	* These purposes are served in this class by three related operations:
				304	*
				305	* <ul>
				306	*
				307	* <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
				308	* with the sequence of escaped octets that represent that character in the
				309	* UTF-8 character set. The Euro currency symbol (<tt>'\u20AC'</tt>),
				310	* for example, is encoded as <tt>"%E2%82%AC"</tt>. <i>(<b>Deviation from
				311	* RFC 2396</b>, which does not specify any particular character
				312	* set.)</i> </p></li>
				313	*
				314	* <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
				315	* encoding it. The space character, for example, is quoted by replacing it
				316	* with <tt>"%20"</tt>. UTF-8 contains US-ASCII, hence for US-ASCII
				317	* characters this transformation has exactly the effect required by
				318	* RFC 2396. </p></li>
				319	*
				320	* <li><p><a name="decode"></a>
				321	* A sequence of escaped octets is <i>decoded</i> by
				322	* replacing it with the sequence of characters that it represents in the
				323	* UTF-8 character set. UTF-8 contains US-ASCII, hence decoding has the
				324	* effect of de-quoting any quoted US-ASCII characters as well as that of
				325	* decoding any encoded non-US-ASCII characters. If a <a
				326	* href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
				327	* when decoding the escaped octets then the erroneous octets are replaced by
				328	* <tt>'\uFFFD'</tt>, the Unicode replacement character. </p></li>
				329	*
				330	* </ul>
				331	*
				332	* These operations are exposed in the constructors and methods of this class
				333	* as follows:
				334	*
				335	* <ul>
				336	*
				337	* <li><p> The {@link #URI(java.lang.String) <code>single-argument
				338	* constructor</code>} requires any illegal characters in its argument to be
				339	* quoted and preserves any escaped octets and <i>other</i> characters that
				340	* are present. </p></li>
				341	*
				342	* <li><p> The {@link
				343	* #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
				344	* <code>multi-argument constructors</code>} quote illegal characters as
				345	* required by the components in which they appear. The percent character
				346	* (<tt>'%'</tt>) is always quoted by these constructors. Any <i>other</i>
				347	* characters are preserved. </p></li>
				348	*
				349	* <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
				350	* getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
				351	* getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
				352	* #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
				353	* values of their corresponding components in raw form, without interpreting
				354	* any escaped octets. The strings returned by these methods may contain
				355	* both escaped octets and <i>other</i> characters, and will not contain any
				356	* illegal characters. </p></li>
				357	*
				358	* <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
				359	* getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
				360	* getFragment}, {@link #getAuthority() getAuthority}, and {@link
				361	* #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
				362	* octets in their corresponding components. The strings returned by these
				363	* methods may contain both <i>other</i> characters and illegal characters,
				364	* and will not contain any escaped octets. </p></li>
				365	*
				366	* <li><p> The {@link #toString() toString} method returns a URI string with
				367	* all necessary quotation but which may contain <i>other</i> characters.
				368	* </p></li>
				369	*
				370	* <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
				371	* quoted and encoded URI string that does not contain any <i>other</i>
				372	* characters. </p></li>
				373	*
				374	* </ul>
				375	*
				376	*
				377	* <h4> Identities </h4>
				378	*
				379	* For any URI <i>u</i>, it is always the case that
				380	*
				381	* <blockquote>
				382	* <tt>new URI(</tt><i>u</i><tt>.toString()).equals(</tt><i>u</i><tt>)</tt> .
				383	* </blockquote>
				384	*
				385	* For any URI <i>u</i> that does not contain redundant syntax such as two
				386	* slashes before an empty authority (as in <tt>file:///tmp/</tt> ) or a
				387	* colon following a host name but no port (as in
				388	* <tt>http://java.sun.com:</tt> ), and that does not encode characters
				389	* except those that must be quoted, the following identities also hold:
				390	*
				391	* <blockquote>
				392	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
				393	*         </tt><i>u</i><tt>.getSchemeSpecificPart(),<br>
				394	*         </tt><i>u</i><tt>.getFragment())<br>
				395	* .equals(</tt><i>u</i><tt>)</tt>
				396	* </blockquote>
				397	*
				398	* in all cases,
				399	*
				400	* <blockquote>
				401	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
				402	*         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getAuthority(),<br>
				403	*         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
				404	*         </tt><i>u</i><tt>.getFragment())<br>
				405	* .equals(</tt><i>u</i><tt>)</tt>
				406	* </blockquote>
				407	*
				408	* if <i>u</i> is hierarchical, and
				409	*
				410	* <blockquote>
				411	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
				412	*         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getHost(), </tt><i>u</i><tt>.getPort(),<br>
				413	*         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
				414	*         </tt><i>u</i><tt>.getFragment())<br>
				415	* .equals(</tt><i>u</i><tt>)</tt>
				416	* </blockquote>
				417	*
				418	* if <i>u</i> is hierarchical and has either no authority or a server-based
				419	* authority.
				420	*
				421	*
				422	* <h4> URIs, URLs, and URNs </h4>
				423	*
				424	* A URI is a uniform resource <i>identifier</i> while a URL is a uniform
				425	* resource <i>locator</i>. Hence every URL is a URI, abstractly speaking, but
				426	* not every URI is a URL. This is because there is another subcategory of
				427	* URIs, uniform resource <i>names</i> (URNs), which name resources but do not
				428	* specify how to locate them. The <tt>mailto</tt>, <tt>news</tt>, and
				429	* <tt>isbn</tt> URIs shown above are examples of URNs.
				430	*
				431	* <p> The conceptual distinction between URIs and URLs is reflected in the
				432	* differences between this class and the {@link URL} class.
				433	*
				434	* <p> An instance of this class represents a URI reference in the syntactic
				435	* sense defined by RFC 2396. A URI may be either absolute or relative.
				436	* A URI string is parsed according to the generic syntax without regard to the
				437	* scheme, if any, that it specifies. No lookup of the host, if any, is
				438	* performed, and no scheme-dependent stream handler is constructed. Equality,
				439	* hashing, and comparison are defined strictly in terms of the character
				440	* content of the instance. In other words, a URI instance is little more than
				441	* a structured string that supports the syntactic, scheme-independent
				442	* operations of comparison, normalization, resolution, and relativization.
				443	*
				444	* <p> An instance of the {@link URL} class, by contrast, represents the
				445	* syntactic components of a URL together with some of the information required
				446	* to access the resource that it describes. A URL must be absolute, that is,
				447	* it must always specify a scheme. A URL string is parsed according to its
				448	* scheme. A stream handler is always established for a URL, and in fact it is
				449	* impossible to create a URL instance for a scheme for which no handler is
				450	* available. Equality and hashing depend upon both the scheme and the
				451	* Internet address of the host, if any; comparison is not defined. In other
				452	* words, a URL is a structured string that supports the syntactic operation of
				453	* resolution as well as the network I/O operations of looking up the host and
				454	* opening a connection to the specified resource.
				455	*
				456	*
				457	* @author Mark Reinhold
				458	* @since 1.4
				459	*
				460	* @see <a href="http://ietf.org/rfc/rfc2279.txt"><i>RFC 2279: UTF-8, a
				461	* transformation format of ISO 10646</i></a>, <br><a
				462	* href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6 Addressing
				463	* Architecture</i></a>, <br><a
				464	* href="http://www.ietf.org/rfc/rfc2396.txt""><i>RFC 2396: Uniform
				465	* Resource Identifiers (URI): Generic Syntax</i></a>, <br><a
				466	* href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
				467	* Literal IPv6 Addresses in URLs</i></a>, <br><a
				468	* href="URISyntaxException.html">URISyntaxException</a>
				469	*/
				470
				471	public final class URI
				472	implements Comparable<URI>, Serializable
				473	{
				474
				475	// Note: Comments containing the word "ASSERT" indicate places where a
				476	// throw of an InternalError should be replaced by an appropriate assertion
				477	// statement once asserts are enabled in the build.
				478
				479	static final long serialVersionUID = -6052424284110960213L;
				480
				481
				482	// -- Properties and components of this instance --
				483
				484	// Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
				485	private transient String scheme; // null ==> relative URI
				486	private transient String fragment;
				487
				488	// Hierarchical URI components: [//<authority>]<path>[?<query>]
				489	private transient String authority; // Registry or server
				490
				491	// Server-based authority: [<userInfo>@]<host>[:<port>]
				492	private transient String userInfo;
				493	private transient String host; // null ==> registry-based
				494	private transient int port = -1; // -1 ==> undefined
				495
				496	// Remaining components of hierarchical URIs
				497	private transient String path; // null ==> opaque
				498	private transient String query;
				499
				500	// The remaining fields may be computed on demand
				501
				502	private volatile transient String schemeSpecificPart;
				503	private volatile transient int hash; // Zero ==> undefined
				504
				505	private volatile transient String decodedUserInfo = null;
				506	private volatile transient String decodedAuthority = null;
				507	private volatile transient String decodedPath = null;
				508	private volatile transient String decodedQuery = null;
				509	private volatile transient String decodedFragment = null;
				510	private volatile transient String decodedSchemeSpecificPart = null;
				511
				512	/**
				513	* The string form of this URI.
				514	*
				515	* @serial
				516	*/
				517	private volatile String string; // The only serializable field
				518
				519
				520
				521	// -- Constructors and factories --
				522
				523	private URI() { } // Used internally
				524
				525	/**
				526	* Constructs a URI by parsing the given string.
				527	*
				528	* <p> This constructor parses the given string exactly as specified by the
				529	* grammar in <a
				530	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				531	* Appendix A, <b><i>except for the following deviations:</i></b> </p>
				532	*
				533	* <ul type=disc>
				534	*
				535	* <li><p> An empty authority component is permitted as long as it is
				536	* followed by a non-empty path, a query component, or a fragment
				537	* component. This allows the parsing of URIs such as
				538	* <tt>"file:///foo/bar"</tt>, which seems to be the intent of
				539	* RFC 2396 although the grammar does not permit it. If the
				540	* authority component is empty then the user-information, host, and port
				541	* components are undefined. </p></li>
				542	*
				543	* <li><p> Empty relative paths are permitted; this seems to be the
				544	* intent of RFC 2396 although the grammar does not permit it. The
				545	* primary consequence of this deviation is that a standalone fragment
				546	* such as <tt>"#foo"</tt> parses as a relative URI with an empty path
				547	* and the given fragment, and can be usefully <a
				548	* href="#resolve-frag">resolved</a> against a base URI.
				549	*
				550	* <li><p> IPv4 addresses in host components are parsed rigorously, as
				551	* specified by <a
				552	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>: Each
				553	* element of a dotted-quad address must contain no more than three
				554	* decimal digits. Each element is further constrained to have a value
				555	* no greater than 255. </p></li>
				556	*
				557	* <li> <p> Hostnames in host components that comprise only a single
				558	* domain label are permitted to start with an <i>alphanum</i>
				559	* character. This seems to be the intent of <a
				560	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
				561	* section 3.2.2 although the grammar does not permit it. The
				562	* consequence of this deviation is that the authority component of a
				563	* hierarchical URI such as <tt>s://123</tt>, will parse as a server-based
				564	* authority. </p></li>
				565	*
				566	* <li><p> IPv6 addresses are permitted for the host component. An IPv6
				567	* address must be enclosed in square brackets (<tt>'['</tt> and
				568	* <tt>']'</tt>) as specified by <a
				569	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>. The
				570	* IPv6 address itself must parse according to <a
				571	* href="http://www.ietf.org/rfc/rfc2373.txt">RFC 2373</a>. IPv6
				572	* addresses are further constrained to describe no more than sixteen
				573	* bytes of address information, a constraint implicit in RFC 2373
				574	* but not expressible in the grammar. </p></li>
				575	*
				576	* <li><p> Characters in the <i>other</i> category are permitted wherever
				577	* RFC 2396 permits <i>escaped</i> octets, that is, in the
				578	* user-information, path, query, and fragment components, as well as in
				579	* the authority component if the authority is registry-based. This
				580	* allows URIs to contain Unicode characters beyond those in the US-ASCII
				581	* character set. </p></li>
				582	*
				583	* </ul>
				584	*
				585	* @param str The string to be parsed into a URI
				586	*
				587	* @throws NullPointerException
				588	* If <tt>str</tt> is <tt>null</tt>
				589	*
				590	* @throws URISyntaxException
				591	* If the given string violates RFC 2396, as augmented
				592	* by the above deviations
				593	*/
				594	public URI(String str) throws URISyntaxException {
				595	new Parser(str).parse(false);
				596	}
				597
				598	/**
				599	* Constructs a hierarchical URI from the given components.
				600	*
				601	* <p> If a scheme is given then the path, if also given, must either be
				602	* empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
				603	* component of the new URI may be left undefined by passing <tt>null</tt>
				604	* for the corresponding parameter or, in the case of the <tt>port</tt>
				605	* parameter, by passing <tt>-1</tt>.
				606	*
				607	* <p> This constructor first builds a URI string from the given components
				608	* according to the rules specified in <a
				609	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				610	* section 5.2, step 7: </p>
				611	*
				612	* <ol>
				613	*
				614	* <li><p> Initially, the result string is empty. </p></li>
				615	*
				616	* <li><p> If a scheme is given then it is appended to the result,
				617	* followed by a colon character (<tt>':'</tt>). </p></li>
				618	*
				619	* <li><p> If user information, a host, or a port are given then the
				620	* string <tt>"//"</tt> is appended. </p></li>
				621	*
				622	* <li><p> If user information is given then it is appended, followed by
				623	* a commercial-at character (<tt>'@'</tt>). Any character not in the
				624	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				625	* categories is <a href="#quote">quoted</a>. </p></li>
				626	*
				627	* <li><p> If a host is given then it is appended. If the host is a
				628	* literal IPv6 address but is not enclosed in square brackets
				629	* (<tt>'['</tt> and <tt>']'</tt>) then the square brackets are added.
				630	* </p></li>
				631	*
				632	* <li><p> If a port number is given then a colon character
				633	* (<tt>':'</tt>) is appended, followed by the port number in decimal.
				634	* </p></li>
				635	*
				636	* <li><p> If a path is given then it is appended. Any character not in
				637	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				638	* categories, and not equal to the slash character (<tt>'/'</tt>) or the
				639	* commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
				640	*
				641	* <li><p> If a query is given then a question-mark character
				642	* (<tt>'?'</tt>) is appended, followed by the query. Any character that
				643	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
				644	* </p></li>
				645	*
				646	* <li><p> Finally, if a fragment is given then a hash character
				647	* (<tt>'#'</tt>) is appended, followed by the fragment. Any character
				648	* that is not a legal URI character is quoted. </p></li>
				649	*
				650	* </ol>
				651	*
				652	* <p> The resulting URI string is then parsed as if by invoking the {@link
				653	* #URI(String)} constructor and then invoking the {@link
				654	* #parseServerAuthority()} method upon the result; this may cause a {@link
				655	* URISyntaxException} to be thrown. </p>
				656	*
				657	* @param scheme Scheme name
				658	* @param userInfo User name and authorization information
				659	* @param host Host name
				660	* @param port Port number
				661	* @param path Path
				662	* @param query Query
				663	* @param fragment Fragment
				664	*
				665	* @throws URISyntaxException
				666	* If both a scheme and a path are given but the path is relative,
				667	* if the URI string constructed from the given components violates
				668	* RFC 2396, or if the authority component of the string is
				669	* present but cannot be parsed as a server-based authority
				670	*/
				671	public URI(String scheme,
				672	String userInfo, String host, int port,
				673	String path, String query, String fragment)
				674	throws URISyntaxException
				675	{
				676	String s = toString(scheme, null,
				677	null, userInfo, host, port,
				678	path, query, fragment);
				679	checkPath(s, scheme, path);
				680	new Parser(s).parse(true);
				681	}
				682
				683	/**
				684	* Constructs a hierarchical URI from the given components.
				685	*
				686	* <p> If a scheme is given then the path, if also given, must either be
				687	* empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
				688	* component of the new URI may be left undefined by passing <tt>null</tt>
				689	* for the corresponding parameter.
				690	*
				691	* <p> This constructor first builds a URI string from the given components
				692	* according to the rules specified in <a
				693	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				694	* section 5.2, step 7: </p>
				695	*
				696	* <ol>
				697	*
				698	* <li><p> Initially, the result string is empty. </p></li>
				699	*
				700	* <li><p> If a scheme is given then it is appended to the result,
				701	* followed by a colon character (<tt>':'</tt>). </p></li>
				702	*
				703	* <li><p> If an authority is given then the string <tt>"//"</tt> is
				704	* appended, followed by the authority. If the authority contains a
				705	* literal IPv6 address then the address must be enclosed in square
				706	* brackets (<tt>'['</tt> and <tt>']'</tt>). Any character not in the
				707	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				708	* categories, and not equal to the commercial-at character
				709	* (<tt>'@'</tt>), is <a href="#quote">quoted</a>. </p></li>
				710	*
				711	* <li><p> If a path is given then it is appended. Any character not in
				712	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				713	* categories, and not equal to the slash character (<tt>'/'</tt>) or the
				714	* commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
				715	*
				716	* <li><p> If a query is given then a question-mark character
				717	* (<tt>'?'</tt>) is appended, followed by the query. Any character that
				718	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
				719	* </p></li>
				720	*
				721	* <li><p> Finally, if a fragment is given then a hash character
				722	* (<tt>'#'</tt>) is appended, followed by the fragment. Any character
				723	* that is not a legal URI character is quoted. </p></li>
				724	*
				725	* </ol>
				726	*
				727	* <p> The resulting URI string is then parsed as if by invoking the {@link
				728	* #URI(String)} constructor and then invoking the {@link
				729	* #parseServerAuthority()} method upon the result; this may cause a {@link
				730	* URISyntaxException} to be thrown. </p>
				731	*
				732	* @param scheme Scheme name
				733	* @param authority Authority
				734	* @param path Path
				735	* @param query Query
				736	* @param fragment Fragment
				737	*
				738	* @throws URISyntaxException
				739	* If both a scheme and a path are given but the path is relative,
				740	* if the URI string constructed from the given components violates
				741	* RFC 2396, or if the authority component of the string is
				742	* present but cannot be parsed as a server-based authority
				743	*/
				744	public URI(String scheme,
				745	String authority,
				746	String path, String query, String fragment)
				747	throws URISyntaxException
				748	{
				749	String s = toString(scheme, null,
				750	authority, null, null, -1,
				751	path, query, fragment);
				752	checkPath(s, scheme, path);
				753	new Parser(s).parse(false);
				754	}
				755
				756	/**
				757	* Constructs a hierarchical URI from the given components.
				758	*
				759	* <p> A component may be left undefined by passing <tt>null</tt>.
				760	*
				761	* <p> This convenience constructor works as if by invoking the
				762	* seven-argument constructor as follows:
				763	*
				764	* <blockquote><tt>
				765	* new {@link #URI(String, String, String, int, String, String, String)
				766	* URI}(scheme, null, host, -1, path, null, fragment);
				767	* </tt></blockquote>
				768	*
				769	* @param scheme Scheme name
				770	* @param host Host name
				771	* @param path Path
				772	* @param fragment Fragment
				773	*
				774	* @throws URISyntaxException
				775	* If the URI string constructed from the given components
				776	* violates RFC 2396
				777	*/
				778	public URI(String scheme, String host, String path, String fragment)
				779	throws URISyntaxException
				780	{
				781	this(scheme, null, host, -1, path, null, fragment);
				782	}
				783
				784	/**
				785	* Constructs a URI from the given components.
				786	*
				787	* <p> A component may be left undefined by passing <tt>null</tt>.
				788	*
				789	* <p> This constructor first builds a URI in string form using the given
				790	* components as follows: </p>
				791	*
				792	* <ol>
				793	*
				794	* <li><p> Initially, the result string is empty. </p></li>
				795	*
				796	* <li><p> If a scheme is given then it is appended to the result,
				797	* followed by a colon character (<tt>':'</tt>). </p></li>
				798	*
				799	* <li><p> If a scheme-specific part is given then it is appended. Any
				800	* character that is not a <a href="#legal-chars">legal URI character</a>
				801	* is <a href="#quote">quoted</a>. </p></li>
				802	*
				803	* <li><p> Finally, if a fragment is given then a hash character
				804	* (<tt>'#'</tt>) is appended to the string, followed by the fragment.
				805	* Any character that is not a legal URI character is quoted. </p></li>
				806	*
				807	* </ol>
				808	*
				809	* <p> The resulting URI string is then parsed in order to create the new
				810	* URI instance as if by invoking the {@link #URI(String)} constructor;
				811	* this may cause a {@link URISyntaxException} to be thrown. </p>
				812	*
				813	* @param scheme Scheme name
				814	* @param ssp Scheme-specific part
				815	* @param fragment Fragment
				816	*
				817	* @throws URISyntaxException
				818	* If the URI string constructed from the given components
				819	* violates RFC 2396
				820	*/
				821	public URI(String scheme, String ssp, String fragment)
				822	throws URISyntaxException
				823	{
				824	new Parser(toString(scheme, ssp,
				825	null, null, null, -1,
				826	null, null, fragment))
				827	.parse(false);
				828	}
				829
				830	/**
				831	* Creates a URI by parsing the given string.
				832	*
				833	* <p> This convenience factory method works as if by invoking the {@link
				834	* #URI(String)} constructor; any {@link URISyntaxException} thrown by the
				835	* constructor is caught and wrapped in a new {@link
				836	* IllegalArgumentException} object, which is then thrown.
				837	*
				838	* <p> This method is provided for use in situations where it is known that
				839	* the given string is a legal URI, for example for URI constants declared
				840	* within in a program, and so it would be considered a programming error
				841	* for the string not to parse as such. The constructors, which throw
				842	* {@link URISyntaxException} directly, should be used situations where a
				843	* URI is being constructed from user input or from some other source that
				844	* may be prone to errors. </p>
				845	*
				846	* @param str The string to be parsed into a URI
				847	* @return The new URI
				848	*
				849	* @throws NullPointerException
				850	* If <tt>str</tt> is <tt>null</tt>
				851	*
				852	* @throws IllegalArgumentException
				853	* If the given string violates RFC 2396
				854	*/
				855	public static URI create(String str) {
				856	try {
				857	return new URI(str);
				858	} catch (URISyntaxException x) {
				859	IllegalArgumentException y = new IllegalArgumentException();
				860	y.initCause(x);
				861	throw y;
				862	}
				863	}
				864
				865
				866	// -- Operations --
				867
				868	/**
				869	* Attempts to parse this URI's authority component, if defined, into
				870	* user-information, host, and port components.
				871	*
				872	* <p> If this URI's authority component has already been recognized as
				873	* being server-based then it will already have been parsed into
				874	* user-information, host, and port components. In this case, or if this
				875	* URI has no authority component, this method simply returns this URI.
				876	*
				877	* <p> Otherwise this method attempts once more to parse the authority
				878	* component into user-information, host, and port components, and throws
				879	* an exception describing why the authority component could not be parsed
				880	* in that way.
				881	*
				882	* <p> This method is provided because the generic URI syntax specified in
				883	* <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
				884	* cannot always distinguish a malformed server-based authority from a
				885	* legitimate registry-based authority. It must therefore treat some
				886	* instances of the former as instances of the latter. The authority
				887	* component in the URI string <tt>"//foo:bar"</tt>, for example, is not a
				888	* legal server-based authority but it is legal as a registry-based
				889	* authority.
				890	*
				891	* <p> In many common situations, for example when working URIs that are
				892	* known to be either URNs or URLs, the hierarchical URIs being used will
				893	* always be server-based. They therefore must either be parsed as such or
				894	* treated as an error. In these cases a statement such as
				895	*
				896	* <blockquote>
				897	* <tt>URI </tt><i>u</i><tt> = new URI(str).parseServerAuthority();</tt>
				898	* </blockquote>
				899	*
				900	* <p> can be used to ensure that <i>u</i> always refers to a URI that, if
				901	* it has an authority component, has a server-based authority with proper
				902	* user-information, host, and port components. Invoking this method also
				903	* ensures that if the authority could not be parsed in that way then an
				904	* appropriate diagnostic message can be issued based upon the exception
				905	* that is thrown. </p>
				906	*
				907	* @return A URI whose authority field has been parsed
				908	* as a server-based authority
				909	*
				910	* @throws URISyntaxException
				911	* If the authority component of this URI is defined
				912	* but cannot be parsed as a server-based authority
				913	* according to RFC 2396
				914	*/
				915	public URI parseServerAuthority()
				916	throws URISyntaxException
				917	{
				918	// We could be clever and cache the error message and index from the
				919	// exception thrown during the original parse, but that would require
				920	// either more fields or a more-obscure representation.
				921	if ((host != null) \|\| (authority == null))
				922	return this;
				923	defineString();
				924	new Parser(string).parse(true);
				925	return this;
				926	}
				927
				928	/**
				929	* Normalizes this URI's path.
				930	*
				931	* <p> If this URI is opaque, or if its path is already in normal form,
				932	* then this URI is returned. Otherwise a new URI is constructed that is
				933	* identical to this URI except that its path is computed by normalizing
				934	* this URI's path in a manner consistent with <a
				935	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				936	* section 5.2, step 6, sub-steps c through f; that is:
				937	* </p>
				938	*
				939	* <ol>
				940	*
				941	* <li><p> All <tt>"."</tt> segments are removed. </p></li>
				942	*
				943	* <li><p> If a <tt>".."</tt> segment is preceded by a non-<tt>".."</tt>
				944	* segment then both of these segments are removed. This step is
				945	* repeated until it is no longer applicable. </p></li>
				946	*
				947	* <li><p> If the path is relative, and if its first segment contains a
				948	* colon character (<tt>':'</tt>), then a <tt>"."</tt> segment is
				949	* prepended. This prevents a relative URI with a path such as
				950	* <tt>"a:b/c/d"</tt> from later being re-parsed as an opaque URI with a
				951	* scheme of <tt>"a"</tt> and a scheme-specific part of <tt>"b/c/d"</tt>.
				952	* <b><i>(Deviation from RFC 2396)</i></b> </p></li>
				953	*
				954	* </ol>
				955	*
				956	* <p> A normalized path will begin with one or more <tt>".."</tt> segments
				957	* if there were insufficient non-<tt>".."</tt> segments preceding them to
				958	* allow their removal. A normalized path will begin with a <tt>"."</tt>
				959	* segment if one was inserted by step 3 above. Otherwise, a normalized
				960	* path will not contain any <tt>"."</tt> or <tt>".."</tt> segments. </p>
				961	*
				962	* @return A URI equivalent to this URI,
				963	* but whose path is in normal form
				964	*/
				965	public URI normalize() {
				966	return normalize(this);
				967	}
				968
				969	/**
				970	* Resolves the given URI against this URI.
				971	*
				972	* <p> If the given URI is already absolute, or if this URI is opaque, then
				973	* the given URI is returned.
				974	*
				975	* <p><a name="resolve-frag"></a> If the given URI's fragment component is
				976	* defined, its path component is empty, and its scheme, authority, and
				977	* query components are undefined, then a URI with the given fragment but
				978	* with all other components equal to those of this URI is returned. This
				979	* allows a URI representing a standalone fragment reference, such as
				980	* <tt>"#foo"</tt>, to be usefully resolved against a base URI.
				981	*
				982	* <p> Otherwise this method constructs a new hierarchical URI in a manner
				983	* consistent with <a
				984	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				985	* section 5.2; that is: </p>
				986	*
				987	* <ol>
				988	*
				989	* <li><p> A new URI is constructed with this URI's scheme and the given
				990	* URI's query and fragment components. </p></li>
				991	*
				992	* <li><p> If the given URI has an authority component then the new URI's
				993	* authority and path are taken from the given URI. </p></li>
				994	*
				995	* <li><p> Otherwise the new URI's authority component is copied from
				996	* this URI, and its path is computed as follows: </p></li>
				997	*
				998	* <ol type=a>
				999	*
				1000	* <li><p> If the given URI's path is absolute then the new URI's path
				1001	* is taken from the given URI. </p></li>
				1002	*
				1003	* <li><p> Otherwise the given URI's path is relative, and so the new
				1004	* URI's path is computed by resolving the path of the given URI
				1005	* against the path of this URI. This is done by concatenating all but
				1006	* the last segment of this URI's path, if any, with the given URI's
				1007	* path and then normalizing the result as if by invoking the {@link
				1008	* #normalize() normalize} method. </p></li>
				1009	*
				1010	* </ol>
				1011	*
				1012	* </ol>
				1013	*
				1014	* <p> The result of this method is absolute if, and only if, either this
				1015	* URI is absolute or the given URI is absolute. </p>
				1016	*
				1017	* @param uri The URI to be resolved against this URI
				1018	* @return The resulting URI
				1019	*
				1020	* @throws NullPointerException
				1021	* If <tt>uri</tt> is <tt>null</tt>
				1022	*/
				1023	public URI resolve(URI uri) {
				1024	return resolve(this, uri);
				1025	}
				1026
				1027	/**
				1028	* Constructs a new URI by parsing the given string and then resolving it
				1029	* against this URI.
				1030	*
				1031	* <p> This convenience method works as if invoking it were equivalent to
				1032	* evaluating the expression <tt>{@link #resolve(java.net.URI)
				1033	* resolve}(URI.{@link #create(String) create}(str))</tt>. </p>
				1034	*
				1035	* @param str The string to be parsed into a URI
				1036	* @return The resulting URI
				1037	*
				1038	* @throws NullPointerException
				1039	* If <tt>str</tt> is <tt>null</tt>
				1040	*
				1041	* @throws IllegalArgumentException
				1042	* If the given string violates RFC 2396
				1043	*/
				1044	public URI resolve(String str) {
				1045	return resolve(URI.create(str));
				1046	}
				1047
				1048	/**
				1049	* Relativizes the given URI against this URI.
				1050	*
				1051	* <p> The relativization of the given URI against this URI is computed as
				1052	* follows: </p>
				1053	*
				1054	* <ol>
				1055	*
				1056	* <li><p> If either this URI or the given URI are opaque, or if the
				1057	* scheme and authority components of the two URIs are not identical, or
				1058	* if the path of this URI is not a prefix of the path of the given URI,
				1059	* then the given URI is returned. </p></li>
				1060	*
				1061	* <li><p> Otherwise a new relative hierarchical URI is constructed with
				1062	* query and fragment components taken from the given URI and with a path
				1063	* component computed by removing this URI's path from the beginning of
				1064	* the given URI's path. </p></li>
				1065	*
				1066	* </ol>
				1067	*
				1068	* @param uri The URI to be relativized against this URI
				1069	* @return The resulting URI
				1070	*
				1071	* @throws NullPointerException
				1072	* If <tt>uri</tt> is <tt>null</tt>
				1073	*/
				1074	public URI relativize(URI uri) {
				1075	return relativize(this, uri);
				1076	}
				1077
				1078	/**
				1079	* Constructs a URL from this URI.
				1080	*
				1081	* <p> This convenience method works as if invoking it were equivalent to
				1082	* evaluating the expression <tt>new URL(this.toString())</tt> after
				1083	* first checking that this URI is absolute. </p>
				1084	*
				1085	* @return A URL constructed from this URI
				1086	*
				1087	* @throws IllegalArgumentException
				1088	* If this URL is not absolute
				1089	*
				1090	* @throws MalformedURLException
				1091	* If a protocol handler for the URL could not be found,
				1092	* or if some other error occurred while constructing the URL
				1093	*/
				1094	public URL toURL()
				1095	throws MalformedURLException {
				1096	if (!isAbsolute())
				1097	throw new IllegalArgumentException("URI is not absolute");
				1098	return new URL(toString());
				1099	}
				1100
				1101	// -- Component access methods --
				1102
				1103	/**
				1104	* Returns the scheme component of this URI.
				1105	*
				1106	* <p> The scheme component of a URI, if defined, only contains characters
				1107	* in the <i>alphanum</i> category and in the string <tt>"-.+"</tt>. A
				1108	* scheme always starts with an <i>alpha</i> character. <p>
				1109	*
				1110	* The scheme component of a URI cannot contain escaped octets, hence this
				1111	* method does not perform any decoding.
				1112	*
				1113	* @return The scheme component of this URI,
				1114	* or <tt>null</tt> if the scheme is undefined
				1115	*/
				1116	public String getScheme() {
				1117	return scheme;
				1118	}
				1119
				1120	/**
				1121	* Tells whether or not this URI is absolute.
				1122	*
				1123	* <p> A URI is absolute if, and only if, it has a scheme component. </p>
				1124	*
				1125	* @return <tt>true</tt> if, and only if, this URI is absolute
				1126	*/
				1127	public boolean isAbsolute() {
				1128	return scheme != null;
				1129	}
				1130
				1131	/**
				1132	* Tells whether or not this URI is opaque.
				1133	*
				1134	* <p> A URI is opaque if, and only if, it is absolute and its
				1135	* scheme-specific part does not begin with a slash character ('/').
				1136	* An opaque URI has a scheme, a scheme-specific part, and possibly
				1137	* a fragment; all other components are undefined. </p>
				1138	*
				1139	* @return <tt>true</tt> if, and only if, this URI is opaque
				1140	*/
				1141	public boolean isOpaque() {
				1142	return path == null;
				1143	}
				1144
				1145	/**
				1146	* Returns the raw scheme-specific part of this URI. The scheme-specific
				1147	* part is never undefined, though it may be empty.
				1148	*
				1149	* <p> The scheme-specific part of a URI only contains legal URI
				1150	* characters. </p>
				1151	*
				1152	* @return The raw scheme-specific part of this URI
				1153	* (never <tt>null</tt>)
				1154	*/
				1155	public String getRawSchemeSpecificPart() {
				1156	defineSchemeSpecificPart();
				1157	return schemeSpecificPart;
				1158	}
				1159
				1160	/**
				1161	* Returns the decoded scheme-specific part of this URI.
				1162	*
				1163	* <p> The string returned by this method is equal to that returned by the
				1164	* {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
				1165	* except that all sequences of escaped octets are <a
				1166	* href="#decode">decoded</a>. </p>
				1167	*
				1168	* @return The decoded scheme-specific part of this URI
				1169	* (never <tt>null</tt>)
				1170	*/
				1171	public String getSchemeSpecificPart() {
				1172	if (decodedSchemeSpecificPart == null)
				1173	decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
				1174	return decodedSchemeSpecificPart;
				1175	}
				1176
				1177	/**
				1178	* Returns the raw authority component of this URI.
				1179	*
				1180	* <p> The authority component of a URI, if defined, only contains the
				1181	* commercial-at character (<tt>'@'</tt>) and characters in the
				1182	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
				1183	* categories. If the authority is server-based then it is further
				1184	* constrained to have valid user-information, host, and port
				1185	* components. </p>
				1186	*
				1187	* @return The raw authority component of this URI,
				1188	* or <tt>null</tt> if the authority is undefined
				1189	*/
				1190	public String getRawAuthority() {
				1191	return authority;
				1192	}
				1193
				1194	/**
				1195	* Returns the decoded authority component of this URI.
				1196	*
				1197	* <p> The string returned by this method is equal to that returned by the
				1198	* {@link #getRawAuthority() getRawAuthority} method except that all
				1199	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
				1200	*
				1201	* @return The decoded authority component of this URI,
				1202	* or <tt>null</tt> if the authority is undefined
				1203	*/
				1204	public String getAuthority() {
				1205	if (decodedAuthority == null)
				1206	decodedAuthority = decode(authority);
				1207	return decodedAuthority;
				1208	}
				1209
				1210	/**
				1211	* Returns the raw user-information component of this URI.
				1212	*
				1213	* <p> The user-information component of a URI, if defined, only contains
				1214	* characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
				1215	* <i>other</i> categories. </p>
				1216	*
				1217	* @return The raw user-information component of this URI,
				1218	* or <tt>null</tt> if the user information is undefined
				1219	*/
				1220	public String getRawUserInfo() {
				1221	return userInfo;
				1222	}
				1223
				1224	/**
				1225	* Returns the decoded user-information component of this URI.
				1226	*
				1227	* <p> The string returned by this method is equal to that returned by the
				1228	* {@link #getRawUserInfo() getRawUserInfo} method except that all
				1229	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
				1230	*
				1231	* @return The decoded user-information component of this URI,
				1232	* or <tt>null</tt> if the user information is undefined
				1233	*/
				1234	public String getUserInfo() {
				1235	if ((decodedUserInfo == null) && (userInfo != null))
				1236	decodedUserInfo = decode(userInfo);
				1237	return decodedUserInfo;
				1238	}
				1239
				1240	/**
				1241	* Returns the host component of this URI.
				1242	*
				1243	* <p> The host component of a URI, if defined, will have one of the
				1244	* following forms: </p>
				1245	*
				1246	* <ul type=disc>
				1247	*
				1248	* <li><p> A domain name consisting of one or more <i>labels</i>
				1249	* separated by period characters (<tt>'.'</tt>), optionally followed by
				1250	* a period character. Each label consists of <i>alphanum</i> characters
				1251	* as well as hyphen characters (<tt>'-'</tt>), though hyphens never
				1252	* occur as the first or last characters in a label. The rightmost
				1253	* label of a domain name consisting of two or more labels, begins
				1254	* with an <i>alpha</i> character. </li>
				1255	*
				1256	* <li><p> A dotted-quad IPv4 address of the form
				1257	* <i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+</tt>,
				1258	* where no <i>digit</i> sequence is longer than three characters and no
				1259	* sequence has a value larger than 255. </p></li>
				1260	*
				1261	* <li><p> An IPv6 address enclosed in square brackets (<tt>'['</tt> and
				1262	* <tt>']'</tt>) and consisting of hexadecimal digits, colon characters
				1263	* (<tt>':'</tt>), and possibly an embedded IPv4 address. The full
				1264	* syntax of IPv6 addresses is specified in <a
				1265	* href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6
				1266	* Addressing Architecture</i></a>. </p></li>
				1267	*
				1268	* </ul>
				1269	*
				1270	* The host component of a URI cannot contain escaped octets, hence this
				1271	* method does not perform any decoding.
				1272	*
				1273	* @return The host component of this URI,
				1274	* or <tt>null</tt> if the host is undefined
				1275	*/
				1276	public String getHost() {
				1277	return host;
				1278	}
				1279
				1280	/**
				1281	* Returns the port number of this URI.
				1282	*
				1283	* <p> The port component of a URI, if defined, is a non-negative
				1284	* integer. </p>
				1285	*
				1286	* @return The port component of this URI,
				1287	* or <tt>-1</tt> if the port is undefined
				1288	*/
				1289	public int getPort() {
				1290	return port;
				1291	}
				1292
				1293	/**
				1294	* Returns the raw path component of this URI.
				1295	*
				1296	* <p> The path component of a URI, if defined, only contains the slash
				1297	* character (<tt>'/'</tt>), the commercial-at character (<tt>'@'</tt>),
				1298	* and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
				1299	* and <i>other</i> categories. </p>
				1300	*
				1301	* @return The path component of this URI,
				1302	* or <tt>null</tt> if the path is undefined
				1303	*/
				1304	public String getRawPath() {
				1305	return path;
				1306	}
				1307
				1308	/**
				1309	* Returns the decoded path component of this URI.
				1310	*
				1311	* <p> The string returned by this method is equal to that returned by the
				1312	* {@link #getRawPath() getRawPath} method except that all sequences of
				1313	* escaped octets are <a href="#decode">decoded</a>. </p>
				1314	*
				1315	* @return The decoded path component of this URI,
				1316	* or <tt>null</tt> if the path is undefined
				1317	*/
				1318	public String getPath() {
				1319	if ((decodedPath == null) && (path != null))
				1320	decodedPath = decode(path);
				1321	return decodedPath;
				1322	}
				1323
				1324	/**
				1325	* Returns the raw query component of this URI.
				1326	*
				1327	* <p> The query component of a URI, if defined, only contains legal URI
				1328	* characters. </p>
				1329	*
				1330	* @return The raw query component of this URI,
				1331	* or <tt>null</tt> if the query is undefined
				1332	*/
				1333	public String getRawQuery() {
				1334	return query;
				1335	}
				1336
				1337	/**
				1338	* Returns the decoded query component of this URI.
				1339	*
				1340	* <p> The string returned by this method is equal to that returned by the
				1341	* {@link #getRawQuery() getRawQuery} method except that all sequences of
				1342	* escaped octets are <a href="#decode">decoded</a>. </p>
				1343	*
				1344	* @return The decoded query component of this URI,
				1345	* or <tt>null</tt> if the query is undefined
				1346	*/
				1347	public String getQuery() {
				1348	if ((decodedQuery == null) && (query != null))
				1349	decodedQuery = decode(query);
				1350	return decodedQuery;
				1351	}
				1352
				1353	/**
				1354	* Returns the raw fragment component of this URI.
				1355	*
				1356	* <p> The fragment component of a URI, if defined, only contains legal URI
				1357	* characters. </p>
				1358	*
				1359	* @return The raw fragment component of this URI,
				1360	* or <tt>null</tt> if the fragment is undefined
				1361	*/
				1362	public String getRawFragment() {
				1363	return fragment;
				1364	}
				1365
				1366	/**
				1367	* Returns the decoded fragment component of this URI.
				1368	*
				1369	* <p> The string returned by this method is equal to that returned by the
				1370	* {@link #getRawFragment() getRawFragment} method except that all
				1371	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
				1372	*
				1373	* @return The decoded fragment component of this URI,
				1374	* or <tt>null</tt> if the fragment is undefined
				1375	*/
				1376	public String getFragment() {
				1377	if ((decodedFragment == null) && (fragment != null))
				1378	decodedFragment = decode(fragment);
				1379	return decodedFragment;
				1380	}
				1381
				1382
				1383	// -- Equality, comparison, hash code, toString, and serialization --
				1384
				1385	/**
				1386	* Tests this URI for equality with another object.
				1387	*
				1388	* <p> If the given object is not a URI then this method immediately
				1389	* returns <tt>false</tt>.
				1390	*
				1391	* <p> For two URIs to be considered equal requires that either both are
				1392	* opaque or both are hierarchical. Their schemes must either both be
				1393	* undefined or else be equal without regard to case. Their fragments
				1394	* must either both be undefined or else be equal.
				1395	*
				1396	* <p> For two opaque URIs to be considered equal, their scheme-specific
				1397	* parts must be equal.
				1398	*
				1399	* <p> For two hierarchical URIs to be considered equal, their paths must
				1400	* be equal and their queries must either both be undefined or else be
				1401	* equal. Their authorities must either both be undefined, or both be
				1402	* registry-based, or both be server-based. If their authorities are
				1403	* defined and are registry-based, then they must be equal. If their
				1404	* authorities are defined and are server-based, then their hosts must be
				1405	* equal without regard to case, their port numbers must be equal, and
				1406	* their user-information components must be equal.
				1407	*
				1408	* <p> When testing the user-information, path, query, fragment, authority,
				1409	* or scheme-specific parts of two URIs for equality, the raw forms rather
				1410	* than the encoded forms of these components are compared and the
				1411	* hexadecimal digits of escaped octets are compared without regard to
				1412	* case.
				1413	*
				1414	* <p> This method satisfies the general contract of the {@link
				1415	* java.lang.Object#equals(Object) Object.equals} method. </p>
				1416	*
				1417	* @param ob The object to which this object is to be compared
				1418	*
				1419	* @return <tt>true</tt> if, and only if, the given object is a URI that
				1420	* is identical to this URI
				1421	*/
				1422	public boolean equals(Object ob) {
				1423	if (ob == this)
				1424	return true;
				1425	if (!(ob instanceof URI))
				1426	return false;
				1427	URI that = (URI)ob;
				1428	if (this.isOpaque() != that.isOpaque()) return false;
				1429	if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
				1430	if (!equal(this.fragment, that.fragment)) return false;
				1431
				1432	// Opaque
				1433	if (this.isOpaque())
				1434	return equal(this.schemeSpecificPart, that.schemeSpecificPart);
				1435
				1436	// Hierarchical
				1437	if (!equal(this.path, that.path)) return false;
				1438	if (!equal(this.query, that.query)) return false;
				1439
				1440	// Authorities
				1441	if (this.authority == that.authority) return true;
				1442	if (this.host != null) {
				1443	// Server-based
				1444	if (!equal(this.userInfo, that.userInfo)) return false;
				1445	if (!equalIgnoringCase(this.host, that.host)) return false;
				1446	if (this.port != that.port) return false;
				1447	} else if (this.authority != null) {
				1448	// Registry-based
				1449	if (!equal(this.authority, that.authority)) return false;
				1450	} else if (this.authority != that.authority) {
				1451	return false;
				1452	}
				1453
				1454	return true;
				1455	}
				1456
				1457	/**
				1458	* Returns a hash-code value for this URI. The hash code is based upon all
				1459	* of the URI's components, and satisfies the general contract of the
				1460	* {@link java.lang.Object#hashCode() Object.hashCode} method.
				1461	*
				1462	* @return A hash-code value for this URI
				1463	*/
				1464	public int hashCode() {
				1465	if (hash != 0)
				1466	return hash;
				1467	int h = hashIgnoringCase(0, scheme);
				1468	h = hash(h, fragment);
				1469	if (isOpaque()) {
				1470	h = hash(h, schemeSpecificPart);
				1471	} else {
				1472	h = hash(h, path);
				1473	h = hash(h, query);
				1474	if (host != null) {
				1475	h = hash(h, userInfo);
				1476	h = hashIgnoringCase(h, host);
				1477	h += 1949 * port;
				1478	} else {
				1479	h = hash(h, authority);
				1480	}
				1481	}
				1482	hash = h;
				1483	return h;
				1484	}
				1485
				1486	/**
				1487	* Compares this URI to another object, which must be a URI.
				1488	*
				1489	* <p> When comparing corresponding components of two URIs, if one
				1490	* component is undefined but the other is defined then the first is
				1491	* considered to be less than the second. Unless otherwise noted, string
				1492	* components are ordered according to their natural, case-sensitive
				1493	* ordering as defined by the {@link java.lang.String#compareTo(Object)
				1494	* String.compareTo} method. String components that are subject to
				1495	* encoding are compared by comparing their raw forms rather than their
				1496	* encoded forms.
				1497	*
				1498	* <p> The ordering of URIs is defined as follows: </p>
				1499	*
				1500	* <ul type=disc>
				1501	*
				1502	* <li><p> Two URIs with different schemes are ordered according the
				1503	* ordering of their schemes, without regard to case. </p></li>
				1504	*
				1505	* <li><p> A hierarchical URI is considered to be less than an opaque URI
				1506	* with an identical scheme. </p></li>
				1507	*
				1508	* <li><p> Two opaque URIs with identical schemes are ordered according
				1509	* to the ordering of their scheme-specific parts. </p></li>
				1510	*
				1511	* <li><p> Two opaque URIs with identical schemes and scheme-specific
				1512	* parts are ordered according to the ordering of their
				1513	* fragments. </p></li>
				1514	*
				1515	* <li><p> Two hierarchical URIs with identical schemes are ordered
				1516	* according to the ordering of their authority components: </p></li>
				1517	*
				1518	* <ul type=disc>
				1519	*
				1520	* <li><p> If both authority components are server-based then the URIs
				1521	* are ordered according to their user-information components; if these
				1522	* components are identical then the URIs are ordered according to the
				1523	* ordering of their hosts, without regard to case; if the hosts are
				1524	* identical then the URIs are ordered according to the ordering of
				1525	* their ports. </p></li>
				1526	*
				1527	* <li><p> If one or both authority components are registry-based then
				1528	* the URIs are ordered according to the ordering of their authority
				1529	* components. </p></li>
				1530	*
				1531	* </ul>
				1532	*
				1533	* <li><p> Finally, two hierarchical URIs with identical schemes and
				1534	* authority components are ordered according to the ordering of their
				1535	* paths; if their paths are identical then they are ordered according to
				1536	* the ordering of their queries; if the queries are identical then they
				1537	* are ordered according to the order of their fragments. </p></li>
				1538	*
				1539	* </ul>
				1540	*
				1541	* <p> This method satisfies the general contract of the {@link
				1542	* java.lang.Comparable#compareTo(Object) Comparable.compareTo}
				1543	* method. </p>
				1544	*
				1545	* @param that
				1546	* The object to which this URI is to be compared
				1547	*
				1548	* @return A negative integer, zero, or a positive integer as this URI is
				1549	* less than, equal to, or greater than the given URI
				1550	*
				1551	* @throws ClassCastException
				1552	* If the given object is not a URI
				1553	*/
				1554	public int compareTo(URI that) {
				1555	int c;
				1556
				1557	if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
				1558	return c;
				1559
				1560	if (this.isOpaque()) {
				1561	if (that.isOpaque()) {
				1562	// Both opaque
				1563	if ((c = compare(this.schemeSpecificPart,
				1564	that.schemeSpecificPart)) != 0)
				1565	return c;
				1566	return compare(this.fragment, that.fragment);
				1567	}
				1568	return +1; // Opaque > hierarchical
				1569	} else if (that.isOpaque()) {
				1570	return -1; // Hierarchical < opaque
				1571	}
				1572
				1573	// Hierarchical
				1574	if ((this.host != null) && (that.host != null)) {
				1575	// Both server-based
				1576	if ((c = compare(this.userInfo, that.userInfo)) != 0)
				1577	return c;
				1578	if ((c = compareIgnoringCase(this.host, that.host)) != 0)
				1579	return c;
				1580	if ((c = this.port - that.port) != 0)
				1581	return c;
				1582	} else {
				1583	// If one or both authorities are registry-based then we simply
				1584	// compare them in the usual, case-sensitive way. If one is
				1585	// registry-based and one is server-based then the strings are
				1586	// guaranteed to be unequal, hence the comparison will never return
				1587	// zero and the compareTo and equals methods will remain
				1588	// consistent.
				1589	if ((c = compare(this.authority, that.authority)) != 0) return c;
				1590	}
				1591
				1592	if ((c = compare(this.path, that.path)) != 0) return c;
				1593	if ((c = compare(this.query, that.query)) != 0) return c;
				1594	return compare(this.fragment, that.fragment);
				1595	}
				1596
				1597	/**
				1598	* Returns the content of this URI as a string.
				1599	*
				1600	* <p> If this URI was created by invoking one of the constructors in this
				1601	* class then a string equivalent to the original input string, or to the
				1602	* string computed from the originally-given components, as appropriate, is
				1603	* returned. Otherwise this URI was created by normalization, resolution,
				1604	* or relativization, and so a string is constructed from this URI's
				1605	* components according to the rules specified in <a
				1606	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				1607	* section 5.2, step 7. </p>
				1608	*
				1609	* @return The string form of this URI
				1610	*/
				1611	public String toString() {
				1612	defineString();
				1613	return string;
				1614	}
				1615
				1616	/**
				1617	* Returns the content of this URI as a US-ASCII string.
				1618	*
				1619	* <p> If this URI does not contain any characters in the <i>other</i>
				1620	* category then an invocation of this method will return the same value as
				1621	* an invocation of the {@link #toString() toString} method. Otherwise
				1622	* this method works as if by invoking that method and then <a
				1623	* href="#encode">encoding</a> the result. </p>
				1624	*
				1625	* @return The string form of this URI, encoded as needed
				1626	* so that it only contains characters in the US-ASCII
				1627	* charset
				1628	*/
				1629	public String toASCIIString() {
				1630	defineString();
				1631	return encode(string);
				1632	}
				1633
				1634
				1635	// -- Serialization support --
				1636
				1637	/**
				1638	* Saves the content of this URI to the given serial stream.
				1639	*
				1640	* <p> The only serializable field of a URI instance is its <tt>string</tt>
				1641	* field. That field is given a value, if it does not have one already,
				1642	* and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
				1643	* method of the given object-output stream is invoked. </p>
				1644	*
				1645	* @param os The object-output stream to which this object
				1646	* is to be written
				1647	*/
				1648	private void writeObject(ObjectOutputStream os)
				1649	throws IOException
				1650	{
				1651	defineString();
				1652	os.defaultWriteObject(); // Writes the string field only
				1653	}
				1654
				1655	/**
				1656	* Reconstitutes a URI from the given serial stream.
				1657	*
				1658	* <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
				1659	* invoked to read the value of the <tt>string</tt> field. The result is
				1660	* then parsed in the usual way.
				1661	*
				1662	* @param is The object-input stream from which this object
				1663	* is being read
				1664	*/
				1665	private void readObject(ObjectInputStream is)
				1666	throws ClassNotFoundException, IOException
				1667	{
				1668	port = -1; // Argh
				1669	is.defaultReadObject();
				1670	try {
				1671	new Parser(string).parse(false);
				1672	} catch (URISyntaxException x) {
				1673	IOException y = new InvalidObjectException("Invalid URI");
				1674	y.initCause(x);
				1675	throw y;
				1676	}
				1677	}
				1678
				1679
				1680	// -- End of public methods --
				1681
				1682
				1683	// -- Utility methods for string-field comparison and hashing --
				1684
				1685	// These methods return appropriate values for null string arguments,
				1686	// thereby simplifying the equals, hashCode, and compareTo methods.
				1687	//
				1688	// The case-ignoring methods should only be applied to strings whose
				1689	// characters are all known to be US-ASCII. Because of this restriction,
				1690	// these methods are faster than the similar methods in the String class.
				1691
				1692	// US-ASCII only
				1693	private static int toLower(char c) {
				1694	if ((c >= 'A') && (c <= 'Z'))
				1695	return c + ('a' - 'A');
				1696	return c;
				1697	}
				1698
				1699	private static boolean equal(String s, String t) {
				1700	if (s == t) return true;
				1701	if ((s != null) && (t != null)) {
				1702	if (s.length() != t.length())
				1703	return false;
				1704	if (s.indexOf('%') < 0)
				1705	return s.equals(t);
				1706	int n = s.length();
				1707	for (int i = 0; i < n;) {
				1708	char c = s.charAt(i);
				1709	char d = t.charAt(i);
				1710	if (c != '%') {
				1711	if (c != d)
				1712	return false;
				1713	i++;
				1714	continue;
				1715	}
				1716	i++;
				1717	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
				1718	return false;
				1719	i++;
				1720	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
				1721	return false;
				1722	i++;
				1723	}
				1724	return true;
				1725	}
				1726	return false;
				1727	}
				1728
				1729	// US-ASCII only
				1730	private static boolean equalIgnoringCase(String s, String t) {
				1731	if (s == t) return true;
				1732	if ((s != null) && (t != null)) {
				1733	int n = s.length();
				1734	if (t.length() != n)
				1735	return false;
				1736	for (int i = 0; i < n; i++) {
				1737	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
				1738	return false;
				1739	}
				1740	return true;
				1741	}
				1742	return false;
				1743	}
				1744
				1745	private static int hash(int hash, String s) {
				1746	if (s == null) return hash;
				1747	return hash * 127 + s.hashCode();
				1748	}
				1749
				1750	// US-ASCII only
				1751	private static int hashIgnoringCase(int hash, String s) {
				1752	if (s == null) return hash;
				1753	int h = hash;
				1754	int n = s.length();
				1755	for (int i = 0; i < n; i++)
				1756	h = 31 * h + toLower(s.charAt(i));
				1757	return h;
				1758	}
				1759
				1760	private static int compare(String s, String t) {
				1761	if (s == t) return 0;
				1762	if (s != null) {
				1763	if (t != null)
				1764	return s.compareTo(t);
				1765	else
				1766	return +1;
				1767	} else {
				1768	return -1;
				1769	}
				1770	}
				1771
				1772	// US-ASCII only
				1773	private static int compareIgnoringCase(String s, String t) {
				1774	if (s == t) return 0;
				1775	if (s != null) {
				1776	if (t != null) {
				1777	int sn = s.length();
				1778	int tn = t.length();
				1779	int n = sn < tn ? sn : tn;
				1780	for (int i = 0; i < n; i++) {
				1781	int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
				1782	if (c != 0)
				1783	return c;
				1784	}
				1785	return sn - tn;
				1786	}
				1787	return +1;
				1788	} else {
				1789	return -1;
				1790	}
				1791	}
				1792
				1793
				1794	// -- String construction --
				1795
				1796	// If a scheme is given then the path, if given, must be absolute
				1797	//
				1798	private static void checkPath(String s, String scheme, String path)
				1799	throws URISyntaxException
				1800	{
				1801	if (scheme != null) {
				1802	if ((path != null)
				1803	&& ((path.length() > 0) && (path.charAt(0) != '/')))
				1804	throw new URISyntaxException(s,
				1805	"Relative path in absolute URI");
				1806	}
				1807	}
				1808
				1809	private void appendAuthority(StringBuffer sb,
				1810	String authority,
				1811	String userInfo,
				1812	String host,
				1813	int port)
				1814	{
				1815	if (host != null) {
				1816	sb.append("//");
				1817	if (userInfo != null) {
				1818	sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
				1819	sb.append('@');
				1820	}
				1821	boolean needBrackets = ((host.indexOf(':') >= 0)
				1822	&& !host.startsWith("[")
				1823	&& !host.endsWith("]"));
				1824	if (needBrackets) sb.append('[');
				1825	sb.append(host);
				1826	if (needBrackets) sb.append(']');
				1827	if (port != -1) {
				1828	sb.append(':');
				1829	sb.append(port);
				1830	}
				1831	} else if (authority != null) {
				1832	sb.append("//");
				1833	if (authority.startsWith("[")) {
				1834	int end = authority.indexOf("]");
				1835	if (end != -1 && authority.indexOf(":")!=-1) {
				1836	String doquote, dontquote;
				1837	if (end == authority.length()) {
				1838	dontquote = authority;
				1839	doquote = "";
				1840	} else {
				1841	dontquote = authority.substring(0,end+1);
				1842	doquote = authority.substring(end+1);
				1843	}
				1844	sb.append (dontquote);
				1845	sb.append(quote(doquote,
				1846	L_REG_NAME \| L_SERVER,
				1847	H_REG_NAME \| H_SERVER));
				1848	}
				1849	} else {
				1850	sb.append(quote(authority,
				1851	L_REG_NAME \| L_SERVER,
				1852	H_REG_NAME \| H_SERVER));
				1853	}
				1854	}
				1855	}
				1856
				1857	private void appendSchemeSpecificPart(StringBuffer sb,
				1858	String opaquePart,
				1859	String authority,
				1860	String userInfo,
				1861	String host,
				1862	int port,
				1863	String path,
				1864	String query)
				1865	{
				1866	if (opaquePart != null) {
				1867	/* check if SSP begins with an IPv6 address
				1868	* because we must not quote a literal IPv6 address
				1869	*/
				1870	if (opaquePart.startsWith("//[")) {
				1871	int end = opaquePart.indexOf("]");
				1872	if (end != -1 && opaquePart.indexOf(":")!=-1) {
				1873	String doquote, dontquote;
				1874	if (end == opaquePart.length()) {
				1875	dontquote = opaquePart;
				1876	doquote = "";
				1877	} else {
				1878	dontquote = opaquePart.substring(0,end+1);
				1879	doquote = opaquePart.substring(end+1);
				1880	}
				1881	sb.append (dontquote);
				1882	sb.append(quote(doquote, L_URIC, H_URIC));
				1883	}
				1884	} else {
				1885	sb.append(quote(opaquePart, L_URIC, H_URIC));
				1886	}
				1887	} else {
				1888	appendAuthority(sb, authority, userInfo, host, port);
				1889	if (path != null)
				1890	sb.append(quote(path, L_PATH, H_PATH));
				1891	if (query != null) {
				1892	sb.append('?');
				1893	sb.append(quote(query, L_URIC, H_URIC));
				1894	}
				1895	}
				1896	}
				1897
				1898	private void appendFragment(StringBuffer sb, String fragment) {
				1899	if (fragment != null) {
				1900	sb.append('#');
				1901	sb.append(quote(fragment, L_URIC, H_URIC));
				1902	}
				1903	}
				1904
				1905	private String toString(String scheme,
				1906	String opaquePart,
				1907	String authority,
				1908	String userInfo,
				1909	String host,
				1910	int port,
				1911	String path,
				1912	String query,
				1913	String fragment)
				1914	{
				1915	StringBuffer sb = new StringBuffer();
				1916	if (scheme != null) {
				1917	sb.append(scheme);
				1918	sb.append(':');
				1919	}
				1920	appendSchemeSpecificPart(sb, opaquePart,
				1921	authority, userInfo, host, port,
				1922	path, query);
				1923	appendFragment(sb, fragment);
				1924	return sb.toString();
				1925	}
				1926
				1927	private void defineSchemeSpecificPart() {
				1928	if (schemeSpecificPart != null) return;
				1929	StringBuffer sb = new StringBuffer();
				1930	appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
				1931	host, port, getPath(), getQuery());
				1932	if (sb.length() == 0) return;
				1933	schemeSpecificPart = sb.toString();
				1934	}
				1935
				1936	private void defineString() {
				1937	if (string != null) return;
				1938
				1939	StringBuffer sb = new StringBuffer();
				1940	if (scheme != null) {
				1941	sb.append(scheme);
				1942	sb.append(':');
				1943	}
				1944	if (isOpaque()) {
				1945	sb.append(schemeSpecificPart);
				1946	} else {
				1947	if (host != null) {
				1948	sb.append("//");
				1949	if (userInfo != null) {
				1950	sb.append(userInfo);
				1951	sb.append('@');
				1952	}
				1953	boolean needBrackets = ((host.indexOf(':') >= 0)
				1954	&& !host.startsWith("[")
				1955	&& !host.endsWith("]"));
				1956	if (needBrackets) sb.append('[');
				1957	sb.append(host);
				1958	if (needBrackets) sb.append(']');
				1959	if (port != -1) {
				1960	sb.append(':');
				1961	sb.append(port);
				1962	}
				1963	} else if (authority != null) {
				1964	sb.append("//");
				1965	sb.append(authority);
				1966	}
				1967	if (path != null)
				1968	sb.append(path);
				1969	if (query != null) {
				1970	sb.append('?');
				1971	sb.append(query);
				1972	}
				1973	}
				1974	if (fragment != null) {
				1975	sb.append('#');
				1976	sb.append(fragment);
				1977	}
				1978	string = sb.toString();
				1979	}
				1980
				1981
				1982	// -- Normalization, resolution, and relativization --
				1983
				1984	// RFC2396 5.2 (6)
				1985	private static String resolvePath(String base, String child,
				1986	boolean absolute)
				1987	{
				1988	int i = base.lastIndexOf('/');
				1989	int cn = child.length();
				1990	String path = "";
				1991
				1992	if (cn == 0) {
				1993	// 5.2 (6a)
				1994	if (i >= 0)
				1995	path = base.substring(0, i + 1);
				1996	} else {
				1997	StringBuffer sb = new StringBuffer(base.length() + cn);
				1998	// 5.2 (6a)
				1999	if (i >= 0)
				2000	sb.append(base.substring(0, i + 1));
				2001	// 5.2 (6b)
				2002	sb.append(child);
				2003	path = sb.toString();
				2004	}
				2005
				2006	// 5.2 (6c-f)
				2007	String np = normalize(path);
				2008
				2009	// 5.2 (6g): If the result is absolute but the path begins with "../",
				2010	// then we simply leave the path as-is
				2011
				2012	return np;
				2013	}
				2014
				2015	// RFC2396 5.2
				2016	private static URI resolve(URI base, URI child) {
				2017	// check if child if opaque first so that NPE is thrown
				2018	// if child is null.
				2019	if (child.isOpaque() \|\| base.isOpaque())
				2020	return child;
				2021
				2022	// 5.2 (2): Reference to current document (lone fragment)
				2023	if ((child.scheme == null) && (child.authority == null)
				2024	&& child.path.equals("") && (child.fragment != null)
				2025	&& (child.query == null)) {
				2026	if ((base.fragment != null)
				2027	&& child.fragment.equals(base.fragment)) {
				2028	return base;
				2029	}
				2030	URI ru = new URI();
				2031	ru.scheme = base.scheme;
				2032	ru.authority = base.authority;
				2033	ru.userInfo = base.userInfo;
				2034	ru.host = base.host;
				2035	ru.port = base.port;
				2036	ru.path = base.path;
				2037	ru.fragment = child.fragment;
				2038	ru.query = base.query;
				2039	return ru;
				2040	}
				2041
				2042	// 5.2 (3): Child is absolute
				2043	if (child.scheme != null)
				2044	return child;
				2045
				2046	URI ru = new URI(); // Resolved URI
				2047	ru.scheme = base.scheme;
				2048	ru.query = child.query;
				2049	ru.fragment = child.fragment;
				2050
				2051	// 5.2 (4): Authority
				2052	if (child.authority == null) {
				2053	ru.authority = base.authority;
				2054	ru.host = base.host;
				2055	ru.userInfo = base.userInfo;
				2056	ru.port = base.port;
				2057
				2058	String cp = (child.path == null) ? "" : child.path;
				2059	if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
				2060	// 5.2 (5): Child path is absolute
				2061	ru.path = child.path;
				2062	} else {
				2063	// 5.2 (6): Resolve relative path
				2064	ru.path = resolvePath(base.path, cp, base.isAbsolute());
				2065	}
				2066	} else {
				2067	ru.authority = child.authority;
				2068	ru.host = child.host;
				2069	ru.userInfo = child.userInfo;
				2070	ru.host = child.host;
				2071	ru.port = child.port;
				2072	ru.path = child.path;
				2073	}
				2074
				2075	// 5.2 (7): Recombine (nothing to do here)
				2076	return ru;
				2077	}
				2078
				2079	// If the given URI's path is normal then return the URI;
				2080	// o.w., return a new URI containing the normalized path.
				2081	//
				2082	private static URI normalize(URI u) {
				2083	if (u.isOpaque() \|\| (u.path == null) \|\| (u.path.length() == 0))
				2084	return u;
				2085
				2086	String np = normalize(u.path);
				2087	if (np == u.path)
				2088	return u;
				2089
				2090	URI v = new URI();
				2091	v.scheme = u.scheme;
				2092	v.fragment = u.fragment;
				2093	v.authority = u.authority;
				2094	v.userInfo = u.userInfo;
				2095	v.host = u.host;
				2096	v.port = u.port;
				2097	v.path = np;
				2098	v.query = u.query;
				2099	return v;
				2100	}
				2101
				2102	// If both URIs are hierarchical, their scheme and authority components are
				2103	// identical, and the base path is a prefix of the child's path, then
				2104	// return a relative URI that, when resolved against the base, yields the
				2105	// child; otherwise, return the child.
				2106	//
				2107	private static URI relativize(URI base, URI child) {
				2108	// check if child if opaque first so that NPE is thrown
				2109	// if child is null.
				2110	if (child.isOpaque() \|\| base.isOpaque())
				2111	return child;
				2112	if (!equalIgnoringCase(base.scheme, child.scheme)
				2113	\|\| !equal(base.authority, child.authority))
				2114	return child;
				2115
				2116	String bp = normalize(base.path);
				2117	String cp = normalize(child.path);
				2118	if (!bp.equals(cp)) {
				2119	if (!bp.endsWith("/"))
				2120	bp = bp + "/";
				2121	if (!cp.startsWith(bp))
				2122	return child;
				2123	}
				2124
				2125	URI v = new URI();
				2126	v.path = cp.substring(bp.length());
				2127	v.query = child.query;
				2128	v.fragment = child.fragment;
				2129	return v;
				2130	}
				2131
				2132
				2133
				2134	// -- Path normalization --
				2135
				2136	// The following algorithm for path normalization avoids the creation of a
				2137	// string object for each segment, as well as the use of a string buffer to
				2138	// compute the final result, by using a single char array and editing it in
				2139	// place. The array is first split into segments, replacing each slash
				2140	// with '\0' and creating a segment-index array, each element of which is
				2141	// the index of the first char in the corresponding segment. We then walk
				2142	// through both arrays, removing ".", "..", and other segments as necessary
				2143	// by setting their entries in the index array to -1. Finally, the two
				2144	// arrays are used to rejoin the segments and compute the final result.
				2145	//
				2146	// This code is based upon src/solaris/native/java/io/canonicalize_md.c
				2147
				2148
				2149	// Check the given path to see if it might need normalization. A path
				2150	// might need normalization if it contains duplicate slashes, a "."
				2151	// segment, or a ".." segment. Return -1 if no further normalization is
				2152	// possible, otherwise return the number of segments found.
				2153	//
				2154	// This method takes a string argument rather than a char array so that
				2155	// this test can be performed without invoking path.toCharArray().
				2156	//
				2157	static private int needsNormalization(String path) {
				2158	boolean normal = true;
				2159	int ns = 0; // Number of segments
				2160	int end = path.length() - 1; // Index of last char in path
				2161	int p = 0; // Index of next char in path
				2162
				2163	// Skip initial slashes
				2164	while (p <= end) {
				2165	if (path.charAt(p) != '/') break;
				2166	p++;
				2167	}
				2168	if (p > 1) normal = false;
				2169
				2170	// Scan segments
				2171	while (p <= end) {
				2172
				2173	// Looking at "." or ".." ?
				2174	if ((path.charAt(p) == '.')
				2175	&& ((p == end)
				2176	\|\| ((path.charAt(p + 1) == '/')
				2177	\|\| ((path.charAt(p + 1) == '.')
				2178	&& ((p + 1 == end)
				2179	\|\| (path.charAt(p + 2) == '/')))))) {
				2180	normal = false;
				2181	}
				2182	ns++;
				2183
				2184	// Find beginning of next segment
				2185	while (p <= end) {
				2186	if (path.charAt(p++) != '/')
				2187	continue;
				2188
				2189	// Skip redundant slashes
				2190	while (p <= end) {
				2191	if (path.charAt(p) != '/') break;
				2192	normal = false;
				2193	p++;
				2194	}
				2195
				2196	break;
				2197	}
				2198	}
				2199
				2200	return normal ? -1 : ns;
				2201	}
				2202
				2203
				2204	// Split the given path into segments, replacing slashes with nulls and
				2205	// filling in the given segment-index array.
				2206	//
				2207	// Preconditions:
				2208	// segs.length == Number of segments in path
				2209	//
				2210	// Postconditions:
				2211	// All slashes in path replaced by '\0'
				2212	// segs[i] == Index of first char in segment i (0 <= i < segs.length)
				2213	//
				2214	static private void split(char[] path, int[] segs) {
				2215	int end = path.length - 1; // Index of last char in path
				2216	int p = 0; // Index of next char in path
				2217	int i = 0; // Index of current segment
				2218
				2219	// Skip initial slashes
				2220	while (p <= end) {
				2221	if (path[p] != '/') break;
				2222	path[p] = '\0';
				2223	p++;
				2224	}
				2225
				2226	while (p <= end) {
				2227
				2228	// Note start of segment
				2229	segs[i++] = p++;
				2230
				2231	// Find beginning of next segment
				2232	while (p <= end) {
				2233	if (path[p++] != '/')
				2234	continue;
				2235	path[p - 1] = '\0';
				2236
				2237	// Skip redundant slashes
				2238	while (p <= end) {
				2239	if (path[p] != '/') break;
				2240	path[p++] = '\0';
				2241	}
				2242	break;
				2243	}
				2244	}
				2245
				2246	if (i != segs.length)
				2247	throw new InternalError(); // ASSERT
				2248	}
				2249
				2250
				2251	// Join the segments in the given path according to the given segment-index
				2252	// array, ignoring those segments whose index entries have been set to -1,
				2253	// and inserting slashes as needed. Return the length of the resulting
				2254	// path.
				2255	//
				2256	// Preconditions:
				2257	// segs[i] == -1 implies segment i is to be ignored
				2258	// path computed by split, as above, with '\0' having replaced '/'
				2259	//
				2260	// Postconditions:
				2261	// path[0] .. path[return value] == Resulting path
				2262	//
				2263	static private int join(char[] path, int[] segs) {
				2264	int ns = segs.length; // Number of segments
				2265	int end = path.length - 1; // Index of last char in path
				2266	int p = 0; // Index of next path char to write
				2267
				2268	if (path[p] == '\0') {
				2269	// Restore initial slash for absolute paths
				2270	path[p++] = '/';
				2271	}
				2272
				2273	for (int i = 0; i < ns; i++) {
				2274	int q = segs[i]; // Current segment
				2275	if (q == -1)
				2276	// Ignore this segment
				2277	continue;
				2278
				2279	if (p == q) {
				2280	// We're already at this segment, so just skip to its end
				2281	while ((p <= end) && (path[p] != '\0'))
				2282	p++;
				2283	if (p <= end) {
				2284	// Preserve trailing slash
				2285	path[p++] = '/';
				2286	}
				2287	} else if (p < q) {
				2288	// Copy q down to p
				2289	while ((q <= end) && (path[q] != '\0'))
				2290	path[p++] = path[q++];
				2291	if (q <= end) {
				2292	// Preserve trailing slash
				2293	path[p++] = '/';
				2294	}
				2295	} else
				2296	throw new InternalError(); // ASSERT false
				2297	}
				2298
				2299	return p;
				2300	}
				2301
				2302
				2303	// Remove "." segments from the given path, and remove segment pairs
				2304	// consisting of a non-".." segment followed by a ".." segment.
				2305	//
				2306	private static void removeDots(char[] path, int[] segs) {
				2307	int ns = segs.length;
				2308	int end = path.length - 1;
				2309
				2310	for (int i = 0; i < ns; i++) {
				2311	int dots = 0; // Number of dots found (0, 1, or 2)
				2312
				2313	// Find next occurrence of "." or ".."
				2314	do {
				2315	int p = segs[i];
				2316	if (path[p] == '.') {
				2317	if (p == end) {
				2318	dots = 1;
				2319	break;
				2320	} else if (path[p + 1] == '\0') {
				2321	dots = 1;
				2322	break;
				2323	} else if ((path[p + 1] == '.')
				2324	&& ((p + 1 == end)
				2325	\|\| (path[p + 2] == '\0'))) {
				2326	dots = 2;
				2327	break;
				2328	}
				2329	}
				2330	i++;
				2331	} while (i < ns);
				2332	if ((i > ns) \|\| (dots == 0))
				2333	break;
				2334
				2335	if (dots == 1) {
				2336	// Remove this occurrence of "."
				2337	segs[i] = -1;
				2338	} else {
				2339	// If there is a preceding non-".." segment, remove both that
				2340	// segment and this occurrence of ".."; otherwise, leave this
				2341	// ".." segment as-is.
				2342	int j;
				2343	for (j = i - 1; j >= 0; j--) {
				2344	if (segs[j] != -1) break;
				2345	}
				2346	if (j >= 0) {
				2347	int q = segs[j];
				2348	if (!((path[q] == '.')
				2349	&& (path[q + 1] == '.')
				2350	&& (path[q + 2] == '\0'))) {
				2351	segs[i] = -1;
				2352	segs[j] = -1;
				2353	}
				2354	}
				2355	}
				2356	}
				2357	}
				2358
				2359
				2360	// DEVIATION: If the normalized path is relative, and if the first
				2361	// segment could be parsed as a scheme name, then prepend a "." segment
				2362	//
				2363	private static void maybeAddLeadingDot(char[] path, int[] segs) {
				2364
				2365	if (path[0] == '\0')
				2366	// The path is absolute
				2367	return;
				2368
				2369	int ns = segs.length;
				2370	int f = 0; // Index of first segment
				2371	while (f < ns) {
				2372	if (segs[f] >= 0)
				2373	break;
				2374	f++;
				2375	}
				2376	if ((f >= ns) \|\| (f == 0))
				2377	// The path is empty, or else the original first segment survived,
				2378	// in which case we already know that no leading "." is needed
				2379	return;
				2380
				2381	int p = segs[f];
				2382	while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
				2383	if (p >= path.length \|\| path[p] == '\0')
				2384	// No colon in first segment, so no "." needed
				2385	return;
				2386
				2387	// At this point we know that the first segment is unused,
				2388	// hence we can insert a "." segment at that position
				2389	path[0] = '.';
				2390	path[1] = '\0';
				2391	segs[0] = 0;
				2392	}
				2393
				2394
				2395	// Normalize the given path string. A normal path string has no empty
				2396	// segments (i.e., occurrences of "//"), no segments equal to ".", and no
				2397	// segments equal to ".." that are preceded by a segment not equal to "..".
				2398	// In contrast to Unix-style pathname normalization, for URI paths we
				2399	// always retain trailing slashes.
				2400	//
				2401	private static String normalize(String ps) {
				2402
				2403	// Does this path need normalization?
				2404	int ns = needsNormalization(ps); // Number of segments
				2405	if (ns < 0)
				2406	// Nope -- just return it
				2407	return ps;
				2408
				2409	char[] path = ps.toCharArray(); // Path in char-array form
				2410
				2411	// Split path into segments
				2412	int[] segs = new int[ns]; // Segment-index array
				2413	split(path, segs);
				2414
				2415	// Remove dots
				2416	removeDots(path, segs);
				2417
				2418	// Prevent scheme-name confusion
				2419	maybeAddLeadingDot(path, segs);
				2420
				2421	// Join the remaining segments and return the result
				2422	String s = new String(path, 0, join(path, segs));
				2423	if (s.equals(ps)) {
				2424	// string was already normalized
				2425	return ps;
				2426	}
				2427	return s;
				2428	}
				2429
				2430
				2431
				2432	// -- Character classes for parsing --
				2433
				2434	// RFC2396 precisely specifies which characters in the US-ASCII charset are
				2435	// permissible in the various components of a URI reference. We here
				2436	// define a set of mask pairs to aid in enforcing these restrictions. Each
				2437	// mask pair consists of two longs, a low mask and a high mask. Taken
				2438	// together they represent a 128-bit mask, where bit i is set iff the
				2439	// character with value i is permitted.
				2440	//
				2441	// This approach is more efficient than sequentially searching arrays of
				2442	// permitted characters. It could be made still more efficient by
				2443	// precompiling the mask information so that a character's presence in a
				2444	// given mask could be determined by a single table lookup.
				2445
				2446	// Compute the low-order mask for the characters in the given string
				2447	private static long lowMask(String chars) {
				2448	int n = chars.length();
				2449	long m = 0;
				2450	for (int i = 0; i < n; i++) {
				2451	char c = chars.charAt(i);
				2452	if (c < 64)
				2453	m \|= (1L << c);
				2454	}
				2455	return m;
				2456	}
				2457
				2458	// Compute the high-order mask for the characters in the given string
				2459	private static long highMask(String chars) {
				2460	int n = chars.length();
				2461	long m = 0;
				2462	for (int i = 0; i < n; i++) {
				2463	char c = chars.charAt(i);
				2464	if ((c >= 64) && (c < 128))
				2465	m \|= (1L << (c - 64));
				2466	}
				2467	return m;
				2468	}
				2469
				2470	// Compute a low-order mask for the characters
				2471	// between first and last, inclusive
				2472	private static long lowMask(char first, char last) {
				2473	long m = 0;
				2474	int f = Math.max(Math.min(first, 63), 0);
				2475	int l = Math.max(Math.min(last, 63), 0);
				2476	for (int i = f; i <= l; i++)
				2477	m \|= 1L << i;
				2478	return m;
				2479	}
				2480
				2481	// Compute a high-order mask for the characters
				2482	// between first and last, inclusive
				2483	private static long highMask(char first, char last) {
				2484	long m = 0;
				2485	int f = Math.max(Math.min(first, 127), 64) - 64;
				2486	int l = Math.max(Math.min(last, 127), 64) - 64;
				2487	for (int i = f; i <= l; i++)
				2488	m \|= 1L << i;
				2489	return m;
				2490	}
				2491
				2492	// Tell whether the given character is permitted by the given mask pair
				2493	private static boolean match(char c, long lowMask, long highMask) {
				2494	if (c < 64)
				2495	return ((1L << c) & lowMask) != 0;
				2496	if (c < 128)
				2497	return ((1L << (c - 64)) & highMask) != 0;
				2498	return false;
				2499	}
				2500
				2501	// Character-class masks, in reverse order from RFC2396 because
				2502	// initializers for static fields cannot make forward references.
				2503
				2504	// digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \|
				2505	// "8" \| "9"
				2506	private static final long L_DIGIT = lowMask('0', '9');
				2507	private static final long H_DIGIT = 0L;
				2508
				2509	// upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \|
				2510	// "J" \| "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \|
				2511	// "S" \| "T" \| "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
				2512	private static final long L_UPALPHA = 0L;
				2513	private static final long H_UPALPHA = highMask('A', 'Z');
				2514
				2515	// lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \|
				2516	// "j" \| "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \|
				2517	// "s" \| "t" \| "u" \| "v" \| "w" \| "x" \| "y" \| "z"
				2518	private static final long L_LOWALPHA = 0L;
				2519	private static final long H_LOWALPHA = highMask('a', 'z');
				2520
				2521	// alpha = lowalpha \| upalpha
				2522	private static final long L_ALPHA = L_LOWALPHA \| L_UPALPHA;
				2523	private static final long H_ALPHA = H_LOWALPHA \| H_UPALPHA;
				2524
				2525	// alphanum = alpha \| digit
				2526	private static final long L_ALPHANUM = L_DIGIT \| L_ALPHA;
				2527	private static final long H_ALPHANUM = H_DIGIT \| H_ALPHA;
				2528
				2529	// hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|
				2530	// "a" \| "b" \| "c" \| "d" \| "e" \| "f"
				2531	private static final long L_HEX = L_DIGIT;
				2532	private static final long H_HEX = highMask('A', 'F') \| highMask('a', 'f');
				2533
				2534	// mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \|
				2535	// "(" \| ")"
				2536	private static final long L_MARK = lowMask("-_.!~*'()");
				2537	private static final long H_MARK = highMask("-_.!~*'()");
				2538
				2539	// unreserved = alphanum \| mark
				2540	private static final long L_UNRESERVED = L_ALPHANUM \| L_MARK;
				2541	private static final long H_UNRESERVED = H_ALPHANUM \| H_MARK;
				2542
				2543	// reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
				2544	// "$" \| "," \| "[" \| "]"
				2545	// Added per RFC2732: "[", "]"
				2546	private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
				2547	private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
				2548
				2549	// The zero'th bit is used to indicate that escape pairs and non-US-ASCII
				2550	// characters are allowed; this is handled by the scanEscape method below.
				2551	private static final long L_ESCAPED = 1L;
				2552	private static final long H_ESCAPED = 0L;
				2553
				2554	// uric = reserved \| unreserved \| escaped
				2555	private static final long L_URIC = L_RESERVED \| L_UNRESERVED \| L_ESCAPED;
				2556	private static final long H_URIC = H_RESERVED \| H_UNRESERVED \| H_ESCAPED;
				2557
				2558	// pchar = unreserved \| escaped \|
				2559	// ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
				2560	private static final long L_PCHAR
				2561	= L_UNRESERVED \| L_ESCAPED \| lowMask(":@&=+$,");
				2562	private static final long H_PCHAR
				2563	= H_UNRESERVED \| H_ESCAPED \| highMask(":@&=+$,");
				2564
				2565	// All valid path characters
				2566	private static final long L_PATH = L_PCHAR \| lowMask(";/");
				2567	private static final long H_PATH = H_PCHAR \| highMask(";/");
				2568
				2569	// Dash, for use in domainlabel and toplabel
				2570	private static final long L_DASH = lowMask("-");
				2571	private static final long H_DASH = highMask("-");
				2572
				2573	// Dot, for use in hostnames
				2574	private static final long L_DOT = lowMask(".");
				2575	private static final long H_DOT = highMask(".");
				2576
				2577	// userinfo = *( unreserved \| escaped \|
				2578	// ";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )
				2579	private static final long L_USERINFO
				2580	= L_UNRESERVED \| L_ESCAPED \| lowMask(";:&=+$,");
				2581	private static final long H_USERINFO
				2582	= H_UNRESERVED \| H_ESCAPED \| highMask(";:&=+$,");
				2583
				2584	// reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
				2585	// ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
				2586	private static final long L_REG_NAME
				2587	= L_UNRESERVED \| L_ESCAPED \| lowMask("$,;:@&=+");
				2588	private static final long H_REG_NAME
				2589	= H_UNRESERVED \| H_ESCAPED \| highMask("$,;:@&=+");
				2590
				2591	// All valid characters for server-based authorities
				2592	private static final long L_SERVER
				2593	= L_USERINFO \| L_ALPHANUM \| L_DASH \| lowMask(".:@[]");
				2594	private static final long H_SERVER
				2595	= H_USERINFO \| H_ALPHANUM \| H_DASH \| highMask(".:@[]");
				2596
				2597	// Special case of server authority that represents an IPv6 address
				2598	// In this case, a % does not signify an escape sequence
				2599	private static final long L_SERVER_PERCENT
				2600	= L_SERVER \| lowMask("%");
				2601	private static final long H_SERVER_PERCENT
				2602	= H_SERVER \| highMask("%");
				2603	private static final long L_LEFT_BRACKET = lowMask("[");
				2604	private static final long H_LEFT_BRACKET = highMask("[");
				2605
				2606	// scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
				2607	private static final long L_SCHEME = L_ALPHA \| L_DIGIT \| lowMask("+-.");
				2608	private static final long H_SCHEME = H_ALPHA \| H_DIGIT \| highMask("+-.");
				2609
				2610	// uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|
				2611	// "&" \| "=" \| "+" \| "$" \| ","
				2612	private static final long L_URIC_NO_SLASH
				2613	= L_UNRESERVED \| L_ESCAPED \| lowMask(";?:@&=+$,");
				2614	private static final long H_URIC_NO_SLASH
				2615	= H_UNRESERVED \| H_ESCAPED \| highMask(";?:@&=+$,");
				2616
				2617
				2618	// -- Escaping and encoding --
				2619
				2620	private final static char[] hexDigits = {
				2621	'0', '1', '2', '3', '4', '5', '6', '7',
				2622	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
				2623	};
				2624
				2625	private static void appendEscape(StringBuffer sb, byte b) {
				2626	sb.append('%');
				2627	sb.append(hexDigits[(b >> 4) & 0x0f]);
				2628	sb.append(hexDigits[(b >> 0) & 0x0f]);
				2629	}
				2630
				2631	private static void appendEncoded(StringBuffer sb, char c) {
				2632	ByteBuffer bb = null;
				2633	try {
				2634	bb = ThreadLocalCoders.encoderFor("UTF-8")
				2635	.encode(CharBuffer.wrap("" + c));
				2636	} catch (CharacterCodingException x) {
				2637	assert false;
				2638	}
				2639	while (bb.hasRemaining()) {
				2640	int b = bb.get() & 0xff;
				2641	if (b >= 0x80)
				2642	appendEscape(sb, (byte)b);
				2643	else
				2644	sb.append((char)b);
				2645	}
				2646	}
				2647
				2648	// Quote any characters in s that are not permitted
				2649	// by the given mask pair
				2650	//
				2651	private static String quote(String s, long lowMask, long highMask) {
				2652	int n = s.length();
				2653	StringBuffer sb = null;
				2654	boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
				2655	for (int i = 0; i < s.length(); i++) {
				2656	char c = s.charAt(i);
				2657	if (c < '\u0080') {
				2658	if (!match(c, lowMask, highMask)) {
				2659	if (sb == null) {
				2660	sb = new StringBuffer();
				2661	sb.append(s.substring(0, i));
				2662	}
				2663	appendEscape(sb, (byte)c);
				2664	} else {
				2665	if (sb != null)
				2666	sb.append(c);
				2667	}
				2668	} else if (allowNonASCII
				2669	&& (Character.isSpaceChar(c)
				2670	\|\| Character.isISOControl(c))) {
				2671	if (sb == null) {
				2672	sb = new StringBuffer();
				2673	sb.append(s.substring(0, i));
				2674	}
				2675	appendEncoded(sb, c);
				2676	} else {
				2677	if (sb != null)
				2678	sb.append(c);
				2679	}
				2680	}
				2681	return (sb == null) ? s : sb.toString();
				2682	}
				2683
				2684	// Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
				2685	// assuming that s is otherwise legal
				2686	//
				2687	private static String encode(String s) {
				2688	int n = s.length();
				2689	if (n == 0)
				2690	return s;
				2691
				2692	// First check whether we actually need to encode
				2693	for (int i = 0;;) {
				2694	if (s.charAt(i) >= '\u0080')
				2695	break;
				2696	if (++i >= n)
				2697	return s;
				2698	}
				2699
				2700	String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
				2701	ByteBuffer bb = null;
				2702	try {
				2703	bb = ThreadLocalCoders.encoderFor("UTF-8")
				2704	.encode(CharBuffer.wrap(ns));
				2705	} catch (CharacterCodingException x) {
				2706	assert false;
				2707	}
				2708
				2709	StringBuffer sb = new StringBuffer();
				2710	while (bb.hasRemaining()) {
				2711	int b = bb.get() & 0xff;
				2712	if (b >= 0x80)
				2713	appendEscape(sb, (byte)b);
				2714	else
				2715	sb.append((char)b);
				2716	}
				2717	return sb.toString();
				2718	}
				2719
				2720	private static int decode(char c) {
				2721	if ((c >= '0') && (c <= '9'))
				2722	return c - '0';
				2723	if ((c >= 'a') && (c <= 'f'))
				2724	return c - 'a' + 10;
				2725	if ((c >= 'A') && (c <= 'F'))
				2726	return c - 'A' + 10;
				2727	assert false;
				2728	return -1;
				2729	}
				2730
				2731	private static byte decode(char c1, char c2) {
				2732	return (byte)( ((decode(c1) & 0xf) << 4)
				2733	\| ((decode(c2) & 0xf) << 0));
				2734	}
				2735
				2736	// Evaluates all escapes in s, applying UTF-8 decoding if needed. Assumes
				2737	// that escapes are well-formed syntactically, i.e., of the form %XX. If a
				2738	// sequence of escaped octets is not valid UTF-8 then the erroneous octets
				2739	// are replaced with '\uFFFD'.
				2740	// Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
				2741	// with a scope_id
				2742	//
				2743	private static String decode(String s) {
				2744	if (s == null)
				2745	return s;
				2746	int n = s.length();
				2747	if (n == 0)
				2748	return s;
				2749	if (s.indexOf('%') < 0)
				2750	return s;
				2751
				2752	StringBuffer sb = new StringBuffer(n);
				2753	ByteBuffer bb = ByteBuffer.allocate(n);
				2754	CharBuffer cb = CharBuffer.allocate(n);
				2755	CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
				2756	.onMalformedInput(CodingErrorAction.REPLACE)
				2757	.onUnmappableCharacter(CodingErrorAction.REPLACE);
				2758
				2759	// This is not horribly efficient, but it will do for now
				2760	char c = s.charAt(0);
				2761	boolean betweenBrackets = false;
				2762
				2763	for (int i = 0; i < n;) {
				2764	assert c == s.charAt(i); // Loop invariant
				2765	if (c == '[') {
				2766	betweenBrackets = true;
				2767	} else if (betweenBrackets && c == ']') {
				2768	betweenBrackets = false;
				2769	}
				2770	if (c != '%' \|\| betweenBrackets) {
				2771	sb.append(c);
				2772	if (++i >= n)
				2773	break;
				2774	c = s.charAt(i);
				2775	continue;
				2776	}
				2777	bb.clear();
				2778	int ui = i;
				2779	for (;;) {
				2780	assert (n - i >= 2);
				2781	bb.put(decode(s.charAt(++i), s.charAt(++i)));
				2782	if (++i >= n)
				2783	break;
				2784	c = s.charAt(i);
				2785	if (c != '%')
				2786	break;
				2787	}
				2788	bb.flip();
				2789	cb.clear();
				2790	dec.reset();
				2791	CoderResult cr = dec.decode(bb, cb, true);
				2792	assert cr.isUnderflow();
				2793	cr = dec.flush(cb);
				2794	assert cr.isUnderflow();
				2795	sb.append(cb.flip().toString());
				2796	}
				2797
				2798	return sb.toString();
				2799	}
				2800
				2801
				2802	// -- Parsing --
				2803
				2804	// For convenience we wrap the input URI string in a new instance of the
				2805	// following internal class. This saves always having to pass the input
				2806	// string as an argument to each internal scan/parse method.
				2807
				2808	private class Parser {
				2809
				2810	private String input; // URI input string
				2811	private boolean requireServerAuthority = false;
				2812
				2813	Parser(String s) {
				2814	input = s;
				2815	string = s;
				2816	}
				2817
				2818	// -- Methods for throwing URISyntaxException in various ways --
				2819
				2820	private void fail(String reason) throws URISyntaxException {
				2821	throw new URISyntaxException(input, reason);
				2822	}
				2823
				2824	private void fail(String reason, int p) throws URISyntaxException {
				2825	throw new URISyntaxException(input, reason, p);
				2826	}
				2827
				2828	private void failExpecting(String expected, int p)
				2829	throws URISyntaxException
				2830	{
				2831	fail("Expected " + expected, p);
				2832	}
				2833
				2834	private void failExpecting(String expected, String prior, int p)
				2835	throws URISyntaxException
				2836	{
				2837	fail("Expected " + expected + " following " + prior, p);
				2838	}
				2839
				2840
				2841	// -- Simple access to the input string --
				2842
				2843	// Return a substring of the input string
				2844	//
				2845	private String substring(int start, int end) {
				2846	return input.substring(start, end);
				2847	}
				2848
				2849	// Return the char at position p,
				2850	// assuming that p < input.length()
				2851	//
				2852	private char charAt(int p) {
				2853	return input.charAt(p);
				2854	}
				2855
				2856	// Tells whether start < end and, if so, whether charAt(start) == c
				2857	//
				2858	private boolean at(int start, int end, char c) {
				2859	return (start < end) && (charAt(start) == c);
				2860	}
				2861
				2862	// Tells whether start + s.length() < end and, if so,
				2863	// whether the chars at the start position match s exactly
				2864	//
				2865	private boolean at(int start, int end, String s) {
				2866	int p = start;
				2867	int sn = s.length();
				2868	if (sn > end - p)
				2869	return false;
				2870	int i = 0;
				2871	while (i < sn) {
				2872	if (charAt(p++) != s.charAt(i)) {
				2873	break;
				2874	}
				2875	i++;
				2876	}
				2877	return (i == sn);
				2878	}
				2879
				2880
				2881	// -- Scanning --
				2882
				2883	// The various scan and parse methods that follow use a uniform
				2884	// convention of taking the current start position and end index as
				2885	// their first two arguments. The start is inclusive while the end is
				2886	// exclusive, just as in the String class, i.e., a start/end pair
				2887	// denotes the left-open interval [start, end) of the input string.
				2888	//
				2889	// These methods never proceed past the end position. They may return
				2890	// -1 to indicate outright failure, but more often they simply return
				2891	// the position of the first char after the last char scanned. Thus
				2892	// a typical idiom is
				2893	//
				2894	// int p = start;
				2895	// int q = scan(p, end, ...);
				2896	// if (q > p)
				2897	// // We scanned something
				2898	// ...;
				2899	// else if (q == p)
				2900	// // We scanned nothing
				2901	// ...;
				2902	// else if (q == -1)
				2903	// // Something went wrong
				2904	// ...;
				2905
				2906
				2907	// Scan a specific char: If the char at the given start position is
				2908	// equal to c, return the index of the next char; otherwise, return the
				2909	// start position.
				2910	//
				2911	private int scan(int start, int end, char c) {
				2912	if ((start < end) && (charAt(start) == c))
				2913	return start + 1;
				2914	return start;
				2915	}
				2916
				2917	// Scan forward from the given start position. Stop at the first char
				2918	// in the err string (in which case -1 is returned), or the first char
				2919	// in the stop string (in which case the index of the preceding char is
				2920	// returned), or the end of the input string (in which case the length
				2921	// of the input string is returned). May return the start position if
				2922	// nothing matches.
				2923	//
				2924	private int scan(int start, int end, String err, String stop) {
				2925	int p = start;
				2926	while (p < end) {
				2927	char c = charAt(p);
				2928	if (err.indexOf(c) >= 0)
				2929	return -1;
				2930	if (stop.indexOf(c) >= 0)
				2931	break;
				2932	p++;
				2933	}
				2934	return p;
				2935	}
				2936
				2937	// Scan a potential escape sequence, starting at the given position,
				2938	// with the given first char (i.e., charAt(start) == c).
				2939	//
				2940	// This method assumes that if escapes are allowed then visible
				2941	// non-US-ASCII chars are also allowed.
				2942	//
				2943	private int scanEscape(int start, int n, char first)
				2944	throws URISyntaxException
				2945	{
				2946	int p = start;
				2947	char c = first;
				2948	if (c == '%') {
				2949	// Process escape pair
				2950	if ((p + 3 <= n)
				2951	&& match(charAt(p + 1), L_HEX, H_HEX)
				2952	&& match(charAt(p + 2), L_HEX, H_HEX)) {
				2953	return p + 3;
				2954	}
				2955	fail("Malformed escape pair", p);
				2956	} else if ((c > 128)
				2957	&& !Character.isSpaceChar(c)
				2958	&& !Character.isISOControl(c)) {
				2959	// Allow unescaped but visible non-US-ASCII chars
				2960	return p + 1;
				2961	}
				2962	return p;
				2963	}
				2964
				2965	// Scan chars that match the given mask pair
				2966	//
				2967	private int scan(int start, int n, long lowMask, long highMask)
				2968	throws URISyntaxException
				2969	{
				2970	int p = start;
				2971	while (p < n) {
				2972	char c = charAt(p);
				2973	if (match(c, lowMask, highMask)) {
				2974	p++;
				2975	continue;
				2976	}
				2977	if ((lowMask & L_ESCAPED) != 0) {
				2978	int q = scanEscape(p, n, c);
				2979	if (q > p) {
				2980	p = q;
				2981	continue;
				2982	}
				2983	}
				2984	break;
				2985	}
				2986	return p;
				2987	}
				2988
				2989	// Check that each of the chars in [start, end) matches the given mask
				2990	//
				2991	private void checkChars(int start, int end,
				2992	long lowMask, long highMask,
				2993	String what)
				2994	throws URISyntaxException
				2995	{
				2996	int p = scan(start, end, lowMask, highMask);
				2997	if (p < end)
				2998	fail("Illegal character in " + what, p);
				2999	}
				3000
				3001	// Check that the char at position p matches the given mask
				3002	//
				3003	private void checkChar(int p,
				3004	long lowMask, long highMask,
				3005	String what)
				3006	throws URISyntaxException
				3007	{
				3008	checkChars(p, p + 1, lowMask, highMask, what);
				3009	}
				3010
				3011
				3012	// -- Parsing --
				3013
				3014	// [<scheme>:]<scheme-specific-part>[#<fragment>]
				3015	//
				3016	void parse(boolean rsa) throws URISyntaxException {
				3017	requireServerAuthority = rsa;
				3018	int ssp; // Start of scheme-specific part
				3019	int n = input.length();
				3020	int p = scan(0, n, "/?#", ":");
				3021	if ((p >= 0) && at(p, n, ':')) {
				3022	if (p == 0)
				3023	failExpecting("scheme name", 0);
				3024	checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
				3025	checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
				3026	scheme = substring(0, p);
				3027	p++; // Skip ':'
				3028	ssp = p;
				3029	if (at(p, n, '/')) {
				3030	p = parseHierarchical(p, n);
				3031	} else {
				3032	int q = scan(p, n, "", "#");
				3033	if (q <= p)
				3034	failExpecting("scheme-specific part", p);
				3035	checkChars(p, q, L_URIC, H_URIC, "opaque part");
				3036	p = q;
				3037	}
				3038	} else {
				3039	ssp = 0;
				3040	p = parseHierarchical(0, n);
				3041	}
				3042	schemeSpecificPart = substring(ssp, p);
				3043	if (at(p, n, '#')) {
				3044	checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
				3045	fragment = substring(p + 1, n);
				3046	p = n;
				3047	}
				3048	if (p < n)
				3049	fail("end of URI", p);
				3050	}
				3051
				3052	// [//authority]<path>[?<query>]
				3053	//
				3054	// DEVIATION from RFC2396: We allow an empty authority component as
				3055	// long as it's followed by a non-empty path, query component, or
				3056	// fragment component. This is so that URIs such as "file:///foo/bar"
				3057	// will parse. This seems to be the intent of RFC2396, though the
				3058	// grammar does not permit it. If the authority is empty then the
				3059	// userInfo, host, and port components are undefined.
				3060	//
				3061	// DEVIATION from RFC2396: We allow empty relative paths. This seems
				3062	// to be the intent of RFC2396, but the grammar does not permit it.
				3063	// The primary consequence of this deviation is that "#f" parses as a
				3064	// relative URI with an empty path.
				3065	//
				3066	private int parseHierarchical(int start, int n)
				3067	throws URISyntaxException
				3068	{
				3069	int p = start;
				3070	if (at(p, n, '/') && at(p + 1, n, '/')) {
				3071	p += 2;
				3072	int q = scan(p, n, "", "/?#");
				3073	if (q > p) {
				3074	p = parseAuthority(p, q);
				3075	} else if (q < n) {
				3076	// DEVIATION: Allow empty authority prior to non-empty
				3077	// path, query component or fragment identifier
				3078	} else
				3079	failExpecting("authority", p);
				3080	}
				3081	int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
				3082	checkChars(p, q, L_PATH, H_PATH, "path");
				3083	path = substring(p, q);
				3084	p = q;
				3085	if (at(p, n, '?')) {
				3086	p++;
				3087	q = scan(p, n, "", "#");
				3088	checkChars(p, q, L_URIC, H_URIC, "query");
				3089	query = substring(p, q);
				3090	p = q;
				3091	}
				3092	return p;
				3093	}
				3094
				3095	// authority = server \| reg_name
				3096	//
				3097	// Ambiguity: An authority that is a registry name rather than a server
				3098	// might have a prefix that parses as a server. We use the fact that
				3099	// the authority component is always followed by '/' or the end of the
				3100	// input string to resolve this: If the complete authority did not
				3101	// parse as a server then we try to parse it as a registry name.
				3102	//
				3103	private int parseAuthority(int start, int n)
				3104	throws URISyntaxException
				3105	{
				3106	int p = start;
				3107	int q = p;
				3108	URISyntaxException ex = null;
				3109
				3110	boolean serverChars;
				3111	boolean regChars;
				3112
				3113	if (scan(p, n, "", "]") > p) {
				3114	// contains a literal IPv6 address, therefore % is allowed
				3115	serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
				3116	} else {
				3117	serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
				3118	}
				3119	regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
				3120
				3121	if (regChars && !serverChars) {
				3122	// Must be a registry-based authority
				3123	authority = substring(p, n);
				3124	return n;
				3125	}
				3126
				3127	if (serverChars) {
				3128	// Might be (probably is) a server-based authority, so attempt
				3129	// to parse it as such. If the attempt fails, try to treat it
				3130	// as a registry-based authority.
				3131	try {
				3132	q = parseServer(p, n);
				3133	if (q < n)
				3134	failExpecting("end of authority", q);
				3135	authority = substring(p, n);
				3136	} catch (URISyntaxException x) {
				3137	// Undo results of failed parse
				3138	userInfo = null;
				3139	host = null;
				3140	port = -1;
				3141	if (requireServerAuthority) {
				3142	// If we're insisting upon a server-based authority,
				3143	// then just re-throw the exception
				3144	throw x;
				3145	} else {
				3146	// Save the exception in case it doesn't parse as a
				3147	// registry either
				3148	ex = x;
				3149	q = p;
				3150	}
				3151	}
				3152	}
				3153
				3154	if (q < n) {
				3155	if (regChars) {
				3156	// Registry-based authority
				3157	authority = substring(p, n);
				3158	} else if (ex != null) {
				3159	// Re-throw exception; it was probably due to
				3160	// a malformed IPv6 address
				3161	throw ex;
				3162	} else {
				3163	fail("Illegal character in authority", q);
				3164	}
				3165	}
				3166
				3167	return n;
				3168	}
				3169
				3170
				3171	// [<userinfo>@]<host>[:<port>]
				3172	//
				3173	private int parseServer(int start, int n)
				3174	throws URISyntaxException
				3175	{
				3176	int p = start;
				3177	int q;
				3178
				3179	// userinfo
				3180	q = scan(p, n, "/?#", "@");
				3181	if ((q >= p) && at(q, n, '@')) {
				3182	checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
				3183	userInfo = substring(p, q);
				3184	p = q + 1; // Skip '@'
				3185	}
				3186
				3187	// hostname, IPv4 address, or IPv6 address
				3188	if (at(p, n, '[')) {
				3189	// DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
				3190	p++;
				3191	q = scan(p, n, "/?#", "]");
				3192	if ((q > p) && at(q, n, ']')) {
				3193	// look for a "%" scope id
				3194	int r = scan (p, q, "", "%");
				3195	if (r > p) {
				3196	parseIPv6Reference(p, r);
				3197	if (r+1 == q) {
				3198	fail ("scope id expected");
				3199	}
				3200	checkChars (r+1, q, L_ALPHANUM, H_ALPHANUM,
				3201	"scope id");
				3202	} else {
				3203	parseIPv6Reference(p, q);
				3204	}
				3205	host = substring(p-1, q+1);
				3206	p = q + 1;
				3207	} else {
				3208	failExpecting("closing bracket for IPv6 address", q);
				3209	}
				3210	} else {
				3211	q = parseIPv4Address(p, n);
				3212	if (q <= p)
				3213	q = parseHostname(p, n);
				3214	p = q;
				3215	}
				3216
				3217	// port
				3218	if (at(p, n, ':')) {
				3219	p++;
				3220	q = scan(p, n, "", "/");
				3221	if (q > p) {
				3222	checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
				3223	try {
				3224	port = Integer.parseInt(substring(p, q));
				3225	} catch (NumberFormatException x) {
				3226	fail("Malformed port number", p);
				3227	}
				3228	p = q;
				3229	}
				3230	}
				3231	if (p < n)
				3232	failExpecting("port number", p);
				3233
				3234	return p;
				3235	}
				3236
				3237	// Scan a string of decimal digits whose value fits in a byte
				3238	//
				3239	private int scanByte(int start, int n)
				3240	throws URISyntaxException
				3241	{
				3242	int p = start;
				3243	int q = scan(p, n, L_DIGIT, H_DIGIT);
				3244	if (q <= p) return q;
				3245	if (Integer.parseInt(substring(p, q)) > 255) return p;
				3246	return q;
				3247	}
				3248
				3249	// Scan an IPv4 address.
				3250	//
				3251	// If the strict argument is true then we require that the given
				3252	// interval contain nothing besides an IPv4 address; if it is false
				3253	// then we only require that it start with an IPv4 address.
				3254	//
				3255	// If the interval does not contain or start with (depending upon the
				3256	// strict argument) a legal IPv4 address characters then we return -1
				3257	// immediately; otherwise we insist that these characters parse as a
				3258	// legal IPv4 address and throw an exception on failure.
				3259	//
				3260	// We assume that any string of decimal digits and dots must be an IPv4
				3261	// address. It won't parse as a hostname anyway, so making that
				3262	// assumption here allows more meaningful exceptions to be thrown.
				3263	//
				3264	private int scanIPv4Address(int start, int n, boolean strict)
				3265	throws URISyntaxException
				3266	{
				3267	int p = start;
				3268	int q;
				3269	int m = scan(p, n, L_DIGIT \| L_DOT, H_DIGIT \| H_DOT);
				3270	if ((m <= p) \|\| (strict && (m != n)))
				3271	return -1;
				3272	for (;;) {
				3273	// Per RFC2732: At most three digits per byte
				3274	// Further constraint: Each element fits in a byte
				3275	if ((q = scanByte(p, m)) <= p) break; p = q;
				3276	if ((q = scan(p, m, '.')) <= p) break; p = q;
				3277	if ((q = scanByte(p, m)) <= p) break; p = q;
				3278	if ((q = scan(p, m, '.')) <= p) break; p = q;
				3279	if ((q = scanByte(p, m)) <= p) break; p = q;
				3280	if ((q = scan(p, m, '.')) <= p) break; p = q;
				3281	if ((q = scanByte(p, m)) <= p) break; p = q;
				3282	if (q < m) break;
				3283	return q;
				3284	}
				3285	fail("Malformed IPv4 address", q);
				3286	return -1;
				3287	}
				3288
				3289	// Take an IPv4 address: Throw an exception if the given interval
				3290	// contains anything except an IPv4 address
				3291	//
				3292	private int takeIPv4Address(int start, int n, String expected)
				3293	throws URISyntaxException
				3294	{
				3295	int p = scanIPv4Address(start, n, true);
				3296	if (p <= start)
				3297	failExpecting(expected, start);
				3298	return p;
				3299	}
				3300
				3301	// Attempt to parse an IPv4 address, returning -1 on failure but
				3302	// allowing the given interval to contain [:<characters>] after
				3303	// the IPv4 address.
				3304	//
				3305	private int parseIPv4Address(int start, int n) {
				3306	int p;
				3307
				3308	try {
				3309	p = scanIPv4Address(start, n, false);
				3310	} catch (URISyntaxException x) {
				3311	return -1;
				3312	} catch (NumberFormatException nfe) {
				3313	return -1;
				3314	}
				3315
				3316	if (p > start && p < n) {
				3317	// IPv4 address is followed by something - check that
				3318	// it's a ":" as this is the only valid character to
				3319	// follow an address.
				3320	if (charAt(p) != ':') {
				3321	p = -1;
				3322	}
				3323	}
				3324
				3325	if (p > start)
				3326	host = substring(start, p);
				3327
				3328	return p;
				3329	}
				3330
				3331	// hostname = domainlabel [ "." ] \| 1*( domainlabel "." ) toplabel [ "." ]
				3332	// domainlabel = alphanum \| alphanum *( alphanum \| "-" ) alphanum
				3333	// toplabel = alpha \| alpha *( alphanum \| "-" ) alphanum
				3334	//
				3335	private int parseHostname(int start, int n)
				3336	throws URISyntaxException
				3337	{
				3338	int p = start;
				3339	int q;
				3340	int l = -1; // Start of last parsed label
				3341
				3342	do {
				3343	// domainlabel = alphanum [ *( alphanum \| "-" ) alphanum ]
				3344	q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
				3345	if (q <= p)
				3346	break;
				3347	l = p;
				3348	if (q > p) {
				3349	p = q;
				3350	q = scan(p, n, L_ALPHANUM \| L_DASH, H_ALPHANUM \| H_DASH);
				3351	if (q > p) {
				3352	if (charAt(q - 1) == '-')
				3353	fail("Illegal character in hostname", q - 1);
				3354	p = q;
				3355	}
				3356	}
				3357	q = scan(p, n, '.');
				3358	if (q <= p)
				3359	break;
				3360	p = q;
				3361	} while (p < n);
				3362
				3363	if ((p < n) && !at(p, n, ':'))
				3364	fail("Illegal character in hostname", p);
				3365
				3366	if (l < 0)
				3367	failExpecting("hostname", start);
				3368
				3369	// for a fully qualified hostname check that the rightmost
				3370	// label starts with an alpha character.
				3371	if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
				3372	fail("Illegal character in hostname", l);
				3373	}
				3374
				3375	host = substring(start, p);
				3376	return p;
				3377	}
				3378
				3379
				3380	// IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
				3381	//
				3382	// Bug: The grammar in RFC2373 Appendix B does not allow addresses of
				3383	// the form ::12.34.56.78, which are clearly shown in the examples
				3384	// earlier in the document. Here is the original grammar:
				3385	//
				3386	// IPv6address = hexpart [ ":" IPv4address ]
				3387	// hexpart = hexseq \| hexseq "::" [ hexseq ] \| "::" [ hexseq ]
				3388	// hexseq = hex4 *( ":" hex4)
				3389	// hex4 = 1*4HEXDIG
				3390	//
				3391	// We therefore use the following revised grammar:
				3392	//
				3393	// IPv6address = hexseq [ ":" IPv4address ]
				3394	// \| hexseq [ "::" [ hexpost ] ]
				3395	// \| "::" [ hexpost ]
				3396	// hexpost = hexseq \| hexseq ":" IPv4address \| IPv4address
				3397	// hexseq = hex4 *( ":" hex4)
				3398	// hex4 = 1*4HEXDIG
				3399	//
				3400	// This covers all and only the following cases:
				3401	//
				3402	// hexseq
				3403	// hexseq : IPv4address
				3404	// hexseq ::
				3405	// hexseq :: hexseq
				3406	// hexseq :: hexseq : IPv4address
				3407	// hexseq :: IPv4address
				3408	// :: hexseq
				3409	// :: hexseq : IPv4address
				3410	// :: IPv4address
				3411	// ::
				3412	//
				3413	// Additionally we constrain the IPv6 address as follows :-
				3414	//
				3415	// i. IPv6 addresses without compressed zeros should contain
				3416	// exactly 16 bytes.
				3417	//
				3418	// ii. IPv6 addresses with compressed zeros should contain
				3419	// less than 16 bytes.
				3420
				3421	private int ipv6byteCount = 0;
				3422
				3423	private int parseIPv6Reference(int start, int n)
				3424	throws URISyntaxException
				3425	{
				3426	int p = start;
				3427	int q;
				3428	boolean compressedZeros = false;
				3429
				3430	q = scanHexSeq(p, n);
				3431
				3432	if (q > p) {
				3433	p = q;
				3434	if (at(p, n, "::")) {
				3435	compressedZeros = true;
				3436	p = scanHexPost(p + 2, n);
				3437	} else if (at(p, n, ':')) {
				3438	p = takeIPv4Address(p + 1, n, "IPv4 address");
				3439	ipv6byteCount += 4;
				3440	}
				3441	} else if (at(p, n, "::")) {
				3442	compressedZeros = true;
				3443	p = scanHexPost(p + 2, n);
				3444	}
				3445	if (p < n)
				3446	fail("Malformed IPv6 address", start);
				3447	if (ipv6byteCount > 16)
				3448	fail("IPv6 address too long", start);
				3449	if (!compressedZeros && ipv6byteCount < 16)
				3450	fail("IPv6 address too short", start);
				3451	if (compressedZeros && ipv6byteCount == 16)
				3452	fail("Malformed IPv6 address", start);
				3453
				3454	return p;
				3455	}
				3456
				3457	private int scanHexPost(int start, int n)
				3458	throws URISyntaxException
				3459	{
				3460	int p = start;
				3461	int q;
				3462
				3463	if (p == n)
				3464	return p;
				3465
				3466	q = scanHexSeq(p, n);
				3467	if (q > p) {
				3468	p = q;
				3469	if (at(p, n, ':')) {
				3470	p++;
				3471	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
				3472	ipv6byteCount += 4;
				3473	}
				3474	} else {
				3475	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
				3476	ipv6byteCount += 4;
				3477	}
				3478	return p;
				3479	}
				3480
				3481	// Scan a hex sequence; return -1 if one could not be scanned
				3482	//
				3483	private int scanHexSeq(int start, int n)
				3484	throws URISyntaxException
				3485	{
				3486	int p = start;
				3487	int q;
				3488
				3489	q = scan(p, n, L_HEX, H_HEX);
				3490	if (q <= p)
				3491	return -1;
				3492	if (at(q, n, '.')) // Beginning of IPv4 address
				3493	return -1;
				3494	if (q > p + 4)
				3495	fail("IPv6 hexadecimal digit sequence too long", p);
				3496	ipv6byteCount += 2;
				3497	p = q;
				3498	while (p < n) {
				3499	if (!at(p, n, ':'))
				3500	break;
				3501	if (at(p + 1, n, ':'))
				3502	break; // "::"
				3503	p++;
				3504	q = scan(p, n, L_HEX, H_HEX);
				3505	if (q <= p)
				3506	failExpecting("digits for an IPv6 address", p);
				3507	if (at(q, n, '.')) { // Beginning of IPv4 address
				3508	p--;
				3509	break;
				3510	}
				3511	if (q > p + 4)
				3512	fail("IPv6 hexadecimal digit sequence too long", p);
				3513	ipv6byteCount += 2;
				3514	p = q;
				3515	}
				3516
				3517	return p;
				3518	}
				3519
				3520	}
				3521
				3522	}