Blame - jdk/src/share/classes/java/nio/charset/Charset.java - platform/libcore

blob: 1372343271274fda5f35a1c31209060193ef8dff [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package java.nio.charset;
				27
				28	import java.nio.ByteBuffer;
				29	import java.nio.CharBuffer;
				30	import java.nio.charset.spi.CharsetProvider;
				31	import java.security.AccessController;
				32	import java.security.AccessControlException;
				33	import java.security.PrivilegedAction;
				34	import java.util.Collections;
				35	import java.util.HashSet;
				36	import java.util.Iterator;
				37	import java.util.Locale;
				38	import java.util.Map;
				39	import java.util.NoSuchElementException;
				40	import java.util.Set;
				41	import java.util.ServiceLoader;
				42	import java.util.ServiceConfigurationError;
				43	import java.util.SortedMap;
				44	import java.util.TreeMap;
				45	import sun.misc.ASCIICaseInsensitiveComparator;
				46	import sun.nio.cs.StandardCharsets;
				47	import sun.nio.cs.ThreadLocalCoders;
				48	import sun.security.action.GetPropertyAction;
				49
				50
				51	/**
				52	* A named mapping between sequences of sixteen-bit Unicode <a
				53	* href="../../lang/Character.html#unicode">code units</a> and sequences of
				54	* bytes. This class defines methods for creating decoders and encoders and
				55	* for retrieving the various names associated with a charset. Instances of
				56	* this class are immutable.
				57	*
				58	* <p> This class also defines static methods for testing whether a particular
				59	* charset is supported, for locating charset instances by name, and for
				60	* constructing a map that contains every charset for which support is
				61	* available in the current Java virtual machine. Support for new charsets can
				62	* be added via the service-provider interface defined in the {@link
				63	* java.nio.charset.spi.CharsetProvider} class.
				64	*
				65	* <p> All of the methods defined in this class are safe for use by multiple
				66	* concurrent threads.
				67	*
				68	*
				69	* <a name="names"><a name="charenc">
				70	* <h4>Charset names</h4>
				71	*
				72	* <p> Charsets are named by strings composed of the following characters:
				73	*
				74	* <ul>
				75	*
				76	* <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
				77	* (<tt>'\u0041'</tt> through <tt>'\u005a'</tt>),
				78	*
				79	* <li> The lowercase letters <tt>'a'</tt> through <tt>'z'</tt>
				80	* (<tt>'\u0061'</tt> through <tt>'\u007a'</tt>),
				81	*
				82	* <li> The digits <tt>'0'</tt> through <tt>'9'</tt>
				83	* (<tt>'\u0030'</tt> through <tt>'\u0039'</tt>),
				84	*
				85	* <li> The dash character <tt>'-'</tt>
				86	* (<tt>'\u002d'</tt>, <small>HYPHEN-MINUS</small>),
				87	*
				88	* <li> The period character <tt>'.'</tt>
				89	* (<tt>'\u002e'</tt>, <small>FULL STOP</small>),
				90	*
				91	* <li> The colon character <tt>':'</tt>
				92	* (<tt>'\u003a'</tt>, <small>COLON</small>), and
				93	*
				94	* <li> The underscore character <tt>'_'</tt>
				95	* (<tt>'\u005f'</tt>, <small>LOW LINE</small>).
				96	*
				97	* </ul>
				98	*
				99	* A charset name must begin with either a letter or a digit. The empty string
				100	* is not a legal charset name. Charset names are not case-sensitive; that is,
				101	* case is always ignored when comparing charset names. Charset names
				102	* generally follow the conventions documented in <a
				103	* href="http://www.ietf.org/rfc/rfc2278.txt"><i>RFC 2278: IANA Charset
				104	* Registration Procedures</i></a>.
				105	*
				106	* <p> Every charset has a <i>canonical name</i> and may also have one or more
				107	* <i>aliases</i>. The canonical name is returned by the {@link #name() name} method
				108	* of this class. Canonical names are, by convention, usually in upper case.
				109	* The aliases of a charset are returned by the {@link #aliases() aliases}
				110	* method.
				111	*
				112	* <a name="hn">
				113	*
				114	* <p> Some charsets have an <i>historical name</i> that is defined for
				115	* compatibility with previous versions of the Java platform. A charset's
				116	* historical name is either its canonical name or one of its aliases. The
				117	* historical name is returned by the <tt>getEncoding()</tt> methods of the
				118	* {@link java.io.InputStreamReader#getEncoding InputStreamReader} and {@link
				119	* java.io.OutputStreamWriter#getEncoding OutputStreamWriter} classes.
				120	*
				121	* <a name="iana">
				122	*
				123	* <p> If a charset listed in the <a
				124	* href="http://www.iana.org/assignments/character-sets"><i>IANA Charset
				125	* Registry</i></a> is supported by an implementation of the Java platform then
				126	* its canonical name must be the name listed in the registry. Many charsets
				127	* are given more than one name in the registry, in which case the registry
				128	* identifies one of the names as <i>MIME-preferred</i>. If a charset has more
				129	* than one registry name then its canonical name must be the MIME-preferred
				130	* name and the other names in the registry must be valid aliases. If a
				131	* supported charset is not listed in the IANA registry then its canonical name
				132	* must begin with one of the strings <tt>"X-"</tt> or <tt>"x-"</tt>.
				133	*
				134	* <p> The IANA charset registry does change over time, and so the canonical
				135	* name and the aliases of a particular charset may also change over time. To
				136	* ensure compatibility it is recommended that no alias ever be removed from a
				137	* charset, and that if the canonical name of a charset is changed then its
				138	* previous canonical name be made into an alias.
				139	*
				140	*
				141	* <h4>Standard charsets</h4>
				142	*
				143	* <p> Every implementation of the Java platform is required to support the
				144	* following standard charsets. Consult the release documentation for your
				145	* implementation to see if any other charsets are supported. The behavior
				146	* of such optional charsets may differ between implementations.
				147	*
				148	* <blockquote><table width="80%" summary="Description of standard charsets">
				149	* <tr><th><p align="left">Charset</p></th><th><p align="left">Description</p></th></tr>
				150	* <tr><td valign=top><tt>US-ASCII</tt></td>
				151	* <td>Seven-bit ASCII, a.k.a. <tt>ISO646-US</tt>,
				152	* a.k.a. the Basic Latin block of the Unicode character set</td></tr>
				153	* <tr><td valign=top><tt>ISO-8859-1  </tt></td>
				154	* <td>ISO Latin Alphabet No. 1, a.k.a. <tt>ISO-LATIN-1</tt></td></tr>
				155	* <tr><td valign=top><tt>UTF-8</tt></td>
				156	* <td>Eight-bit UCS Transformation Format</td></tr>
				157	* <tr><td valign=top><tt>UTF-16BE</tt></td>
				158	* <td>Sixteen-bit UCS Transformation Format,
				159	* big-endian byte order</td></tr>
				160	* <tr><td valign=top><tt>UTF-16LE</tt></td>
				161	* <td>Sixteen-bit UCS Transformation Format,
				162	* little-endian byte order</td></tr>
				163	* <tr><td valign=top><tt>UTF-16</tt></td>
				164	* <td>Sixteen-bit UCS Transformation Format,
				165	* byte order identified by an optional byte-order mark</td></tr>
				166	* </table></blockquote>
				167	*
				168	* <p> The <tt>UTF-8</tt> charset is specified by <a
				169	* href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC 2279</i></a>; the
				170	* transformation format upon which it is based is specified in
				171	* Amendment 2 of ISO 10646-1 and is also described in the <a
				172	* href="http://www.unicode.org/unicode/standard/standard.html"><i>Unicode
				173	* Standard</i></a>.
				174	*
				175	* <p> The <tt>UTF-16</tt> charsets are specified by <a
				176	* href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC 2781</i></a>; the
				177	* transformation formats upon which they are based are specified in
				178	* Amendment 1 of ISO 10646-1 and are also described in the <a
				179	* href="http://www.unicode.org/unicode/standard/standard.html"><i>Unicode
				180	* Standard</i></a>.
				181	*
				182	* <p> The <tt>UTF-16</tt> charsets use sixteen-bit quantities and are
				183	* therefore sensitive to byte order. In these encodings the byte order of a
				184	* stream may be indicated by an initial <i>byte-order mark</i> represented by
				185	* the Unicode character <tt>'\uFEFF'</tt>. Byte-order marks are handled
				186	* as follows:
				187	*
				188	* <ul>
				189	*
				190	* <li><p> When decoding, the <tt>UTF-16BE</tt> and <tt>UTF-16LE</tt>
				191	* charsets ignore byte-order marks; when encoding, they do not write
				192	* byte-order marks. </p></li>
				193	*
				194	* <li><p> When decoding, the <tt>UTF-16</tt> charset interprets a byte-order
				195	* mark to indicate the byte order of the stream but defaults to big-endian
				196	* if there is no byte-order mark; when encoding, it uses big-endian byte
				197	* order and writes a big-endian byte-order mark. </p></li>
				198	*
				199	* </ul>
				200	*
				201	* In any case, when a byte-order mark is read at the beginning of a decoding
				202	* operation it is omitted from the resulting sequence of characters. Byte
				203	* order marks occuring after the first element of an input sequence are not
				204	* omitted since the same code is used to represent <small>ZERO-WIDTH
				205	* NON-BREAKING SPACE</small>.
				206	*
				207	* <p> Every instance of the Java virtual machine has a default charset, which
				208	* may or may not be one of the standard charsets. The default charset is
				209	* determined during virtual-machine startup and typically depends upon the
				210	* locale and charset being used by the underlying operating system. </p>
				211	*
				212	*
				213	* <h4>Terminology</h4>
				214	*
				215	* <p> The name of this class is taken from the terms used in <a
				216	* href="http://www.ietf.org/rfc/rfc2278.txt""><i>RFC 2278</i></a>. In that
				217	* document a <i>charset</i> is defined as the combination of a coded character
				218	* set and a character-encoding scheme.
				219	*
				220	* <p> A <i>coded character set</i> is a mapping between a set of abstract
				221	* characters and a set of integers. US-ASCII, ISO 8859-1,
				222	* JIS X 0201, and full Unicode, which is the same as
				223	* ISO 10646-1, are examples of coded character sets.
				224	*
				225	* <p> A <i>character-encoding scheme</i> is a mapping between a coded
				226	* character set and a set of octet (eight-bit byte) sequences. UTF-8, UCS-2,
				227	* UTF-16, ISO 2022, and EUC are examples of character-encoding schemes.
				228	* Encoding schemes are often associated with a particular coded character set;
				229	* UTF-8, for example, is used only to encode Unicode. Some schemes, however,
				230	* are associated with multiple character sets; EUC, for example, can be used
				231	* to encode characters in a variety of Asian character sets.
				232	*
				233	* <p> When a coded character set is used exclusively with a single
				234	* character-encoding scheme then the corresponding charset is usually named
				235	* for the character set; otherwise a charset is usually named for the encoding
				236	* scheme and, possibly, the locale of the character sets that it supports.
				237	* Hence <tt>US-ASCII</tt> is the name of the charset for US-ASCII while
				238	* <tt>EUC-JP</tt> is the name of the charset that encodes the
				239	* JIS X 0201, JIS X 0208, and JIS X 0212
				240	* character sets.
				241	*
				242	* <p> The native character encoding of the Java programming language is
				243	* UTF-16. A charset in the Java platform therefore defines a mapping between
				244	* sequences of sixteen-bit UTF-16 code units and sequences of bytes. </p>
				245	*
				246	*
				247	* @author Mark Reinhold
				248	* @author JSR-51 Expert Group
				249	* @since 1.4
				250	*
				251	* @see CharsetDecoder
				252	* @see CharsetEncoder
				253	* @see java.nio.charset.spi.CharsetProvider
				254	* @see java.lang.Character
				255	*/
				256
				257	public abstract class Charset
				258	implements Comparable<Charset>
				259	{
				260
				261	/* -- Static methods -- */
				262
				263	private static String bugLevel = null;
				264
				265	static boolean atBugLevel(String bl) { // package-private
				266	if (bugLevel == null) {
				267	if (!sun.misc.VM.isBooted())
				268	return false;
				269	bugLevel = AccessController.doPrivileged(
				270	new GetPropertyAction("sun.nio.cs.bugLevel"));
				271	if (bugLevel == null)
				272	bugLevel = "";
				273	}
				274	return (bugLevel != null) && bugLevel.equals(bl);
				275	}
				276
				277	/**
				278	* Checks that the given string is a legal charset name. </p>
				279	*
				280	* @param s
				281	* A purported charset name
				282	*
				283	* @throws IllegalCharsetNameException
				284	* If the given name is not a legal charset name
				285	*/
				286	private static void checkName(String s) {
				287	int n = s.length();
				288	if (!atBugLevel("1.4")) {
				289	if (n == 0)
				290	throw new IllegalCharsetNameException(s);
				291	}
				292	for (int i = 0; i < n; i++) {
				293	char c = s.charAt(i);
				294	if (c >= 'A' && c <= 'Z') continue;
				295	if (c >= 'a' && c <= 'z') continue;
				296	if (c >= '0' && c <= '9') continue;
				297	if (c == '-' && i != 0) continue;
				298	if (c == ':' && i != 0) continue;
				299	if (c == '_' && i != 0) continue;
				300	if (c == '.' && i != 0) continue;
				301	throw new IllegalCharsetNameException(s);
				302	}
				303	}
				304
				305	/* The standard set of charsets */
				306	private static CharsetProvider standardProvider = new StandardCharsets();
				307
				308	// Cache of the most-recently-returned charsets,
				309	// along with the names that were used to find them
				310	//
				311	private static volatile Object[] cache1 = null; // "Level 1" cache
				312	private static volatile Object[] cache2 = null; // "Level 2" cache
				313
				314	private static void cache(String charsetName, Charset cs) {
				315	cache2 = cache1;
				316	cache1 = new Object[] { charsetName, cs };
				317	}
				318
				319	// Creates an iterator that walks over the available providers, ignoring
				320	// those whose lookup or instantiation causes a security exception to be
				321	// thrown. Should be invoked with full privileges.
				322	//
				323	private static Iterator providers() {
				324	return new Iterator() {
				325
				326	ClassLoader cl = ClassLoader.getSystemClassLoader();
				327	ServiceLoader<CharsetProvider> sl =
				328	ServiceLoader.load(CharsetProvider.class, cl);
				329	Iterator<CharsetProvider> i = sl.iterator();
				330
				331	Object next = null;
				332
				333	private boolean getNext() {
				334	while (next == null) {
				335	try {
				336	if (!i.hasNext())
				337	return false;
				338	next = i.next();
				339	} catch (ServiceConfigurationError sce) {
				340	if (sce.getCause() instanceof SecurityException) {
				341	// Ignore security exceptions
				342	continue;
				343	}
				344	throw sce;
				345	}
				346	}
				347	return true;
				348	}
				349
				350	public boolean hasNext() {
				351	return getNext();
				352	}
				353
				354	public Object next() {
				355	if (!getNext())
				356	throw new NoSuchElementException();
				357	Object n = next;
				358	next = null;
				359	return n;
				360	}
				361
				362	public void remove() {
				363	throw new UnsupportedOperationException();
				364	}
				365
				366	};
				367	}
				368
				369	// Thread-local gate to prevent recursive provider lookups
				370	private static ThreadLocal gate = new ThreadLocal();
				371
				372	private static Charset lookupViaProviders(final String charsetName) {
				373
				374	// The runtime startup sequence looks up standard charsets as a
				375	// consequence of the VM's invocation of System.initializeSystemClass
				376	// in order to, e.g., set system properties and encode filenames. At
				377	// that point the application class loader has not been initialized,
				378	// however, so we can't look for providers because doing so will cause
				379	// that loader to be prematurely initialized with incomplete
				380	// information.
				381	//
				382	if (!sun.misc.VM.isBooted())
				383	return null;
				384
				385	if (gate.get() != null)
				386	// Avoid recursive provider lookups
				387	return null;
				388	try {
				389	gate.set(gate);
				390
				391	return AccessController.doPrivileged(
				392	new PrivilegedAction<Charset>() {
				393	public Charset run() {
				394	for (Iterator i = providers(); i.hasNext();) {
				395	CharsetProvider cp = (CharsetProvider)i.next();
				396	Charset cs = cp.charsetForName(charsetName);
				397	if (cs != null)
				398	return cs;
				399	}
				400	return null;
				401	}
				402	});
				403
				404	} finally {
				405	gate.set(null);
				406	}
				407	}
				408
				409	/* The extended set of charsets */
				410	private static Object extendedProviderLock = new Object();
				411	private static boolean extendedProviderProbed = false;
				412	private static CharsetProvider extendedProvider = null;
				413
				414	private static void probeExtendedProvider() {
				415	AccessController.doPrivileged(new PrivilegedAction<Object>() {
				416	public Object run() {
				417	try {
				418	Class epc
				419	= Class.forName("sun.nio.cs.ext.ExtendedCharsets");
				420	extendedProvider = (CharsetProvider)epc.newInstance();
				421	} catch (ClassNotFoundException x) {
				422	// Extended charsets not available
				423	// (charsets.jar not present)
				424	} catch (InstantiationException x) {
				425	throw new Error(x);
				426	} catch (IllegalAccessException x) {
				427	throw new Error(x);
				428	}
				429	return null;
				430	}
				431	});
				432	}
				433
				434	private static Charset lookupExtendedCharset(String charsetName) {
				435	CharsetProvider ecp = null;
				436	synchronized (extendedProviderLock) {
				437	if (!extendedProviderProbed) {
				438	probeExtendedProvider();
				439	extendedProviderProbed = true;
				440	}
				441	ecp = extendedProvider;
				442	}
				443	return (ecp != null) ? ecp.charsetForName(charsetName) : null;
				444	}
				445
				446	private static Charset lookup(String charsetName) {
				447	if (charsetName == null)
				448	throw new IllegalArgumentException("Null charset name");
				449
				450	Object[] a;
				451	if ((a = cache1) != null && charsetName.equals(a[0]))
				452	return (Charset)a[1];
				453	// We expect most programs to use one Charset repeatedly.
				454	// We convey a hint to this effect to the VM by putting the
				455	// level 1 cache miss code in a separate method.
				456	return lookup2(charsetName);
				457	}
				458
				459	private static Charset lookup2(String charsetName) {
				460	Object[] a;
				461	if ((a = cache2) != null && charsetName.equals(a[0])) {
				462	cache2 = cache1;
				463	cache1 = a;
				464	return (Charset)a[1];
				465	}
				466
				467	Charset cs;
				468	if ((cs = standardProvider.charsetForName(charsetName)) != null \|\|
				469	(cs = lookupExtendedCharset(charsetName)) != null \|\|
				470	(cs = lookupViaProviders(charsetName)) != null)
				471	{
				472	cache(charsetName, cs);
				473	return cs;
				474	}
				475
				476	/* Only need to check the name if we didn't find a charset for it */
				477	checkName(charsetName);
				478	return null;
				479	}
				480
				481	/**
				482	* Tells whether the named charset is supported. </p>
				483	*
				484	* @param charsetName
				485	* The name of the requested charset; may be either
				486	* a canonical name or an alias
				487	*
				488	* @return <tt>true</tt> if, and only if, support for the named charset
				489	* is available in the current Java virtual machine
				490	*
				491	* @throws IllegalCharsetNameException
				492	* If the given charset name is illegal
				493	*
				494	* @throws IllegalArgumentException
				495	* If the given <tt>charsetName</tt> is null
				496	*/
				497	public static boolean isSupported(String charsetName) {
				498	return (lookup(charsetName) != null);
				499	}
				500
				501	/**
				502	* Returns a charset object for the named charset. </p>
				503	*
				504	* @param charsetName
				505	* The name of the requested charset; may be either
				506	* a canonical name or an alias
				507	*
				508	* @return A charset object for the named charset
				509	*
				510	* @throws IllegalCharsetNameException
				511	* If the given charset name is illegal
				512	*
				513	* @throws IllegalArgumentException
				514	* If the given <tt>charsetName</tt> is null
				515	*
				516	* @throws UnsupportedCharsetException
				517	* If no support for the named charset is available
				518	* in this instance of the Java virtual machine
				519	*/
				520	public static Charset forName(String charsetName) {
				521	Charset cs = lookup(charsetName);
				522	if (cs != null)
				523	return cs;
				524	throw new UnsupportedCharsetException(charsetName);
				525	}
				526
				527	// Fold charsets from the given iterator into the given map, ignoring
				528	// charsets whose names already have entries in the map.
				529	//
				530	private static void put(Iterator i, Map m) {
				531	while (i.hasNext()) {
				532	Charset cs = (Charset)i.next();
				533	if (!m.containsKey(cs.name()))
				534	m.put(cs.name(), cs);
				535	}
				536	}
				537
				538	/**
				539	* Constructs a sorted map from canonical charset names to charset objects.
				540	*
				541	* <p> The map returned by this method will have one entry for each charset
				542	* for which support is available in the current Java virtual machine. If
				543	* two or more supported charsets have the same canonical name then the
				544	* resulting map will contain just one of them; which one it will contain
				545	* is not specified. </p>
				546	*
				547	* <p> The invocation of this method, and the subsequent use of the
				548	* resulting map, may cause time-consuming disk or network I/O operations
				549	* to occur. This method is provided for applications that need to
				550	* enumerate all of the available charsets, for example to allow user
				551	* charset selection. This method is not used by the {@link #forName
				552	* forName} method, which instead employs an efficient incremental lookup
				553	* algorithm.
				554	*
				555	* <p> This method may return different results at different times if new
				556	* charset providers are dynamically made available to the current Java
				557	* virtual machine. In the absence of such changes, the charsets returned
				558	* by this method are exactly those that can be retrieved via the {@link
				559	* #forName forName} method. </p>
				560	*
				561	* @return An immutable, case-insensitive map from canonical charset names
				562	* to charset objects
				563	*/
				564	public static SortedMap<String,Charset> availableCharsets() {
				565	return AccessController.doPrivileged(
				566	new PrivilegedAction<SortedMap<String,Charset>>() {
				567	public SortedMap<String,Charset> run() {
				568	TreeMap<String,Charset> m =
				569	new TreeMap<String,Charset>(
				570	ASCIICaseInsensitiveComparator.CASE_INSENSITIVE_ORDER);
				571	put(standardProvider.charsets(), m);
				572	for (Iterator i = providers(); i.hasNext();) {
				573	CharsetProvider cp = (CharsetProvider)i.next();
				574	put(cp.charsets(), m);
				575	}
				576	return Collections.unmodifiableSortedMap(m);
				577	}
				578	});
				579	}
				580
				581	private static volatile Charset defaultCharset;
				582
				583	/**
				584	* Returns the default charset of this Java virtual machine.
				585	*
				586	* <p> The default charset is determined during virtual-machine startup and
				587	* typically depends upon the locale and charset of the underlying
				588	* operating system.
				589	*
				590	* @return A charset object for the default charset
				591	*
				592	* @since 1.5
				593	*/
				594	public static Charset defaultCharset() {
				595	if (defaultCharset == null) {
				596	synchronized (Charset.class) {
				597	String csn = AccessController.doPrivileged(
				598	new GetPropertyAction("file.encoding"));
				599	Charset cs = lookup(csn);
				600	if (cs != null)
				601	defaultCharset = cs;
				602	else
				603	defaultCharset = forName("UTF-8");
				604	}
				605	}
				606	return defaultCharset;
				607	}
				608
				609
				610	/* -- Instance fields and methods -- */
				611
				612	private final String name; // tickles a bug in oldjavac
				613	private final String[] aliases; // tickles a bug in oldjavac
				614	private Set aliasSet = null;
				615
				616	/**
				617	* Initializes a new charset with the given canonical name and alias
				618	* set. </p>
				619	*
				620	* @param canonicalName
				621	* The canonical name of this charset
				622	*
				623	* @param aliases
				624	* An array of this charset's aliases, or null if it has no aliases
				625	*
				626	* @throws IllegalCharsetNameException
				627	* If the canonical name or any of the aliases are illegal
				628	*/
				629	protected Charset(String canonicalName, String[] aliases) {
				630	checkName(canonicalName);
				631	String[] as = (aliases == null) ? new String[0] : aliases;
				632	for (int i = 0; i < as.length; i++)
				633	checkName(as[i]);
				634	this.name = canonicalName;
				635	this.aliases = as;
				636	}
				637
				638	/**
				639	* Returns this charset's canonical name. </p>
				640	*
				641	* @return The canonical name of this charset
				642	*/
				643	public final String name() {
				644	return name;
				645	}
				646
				647	/**
				648	* Returns a set containing this charset's aliases. </p>
				649	*
				650	* @return An immutable set of this charset's aliases
				651	*/
				652	public final Set<String> aliases() {
				653	if (aliasSet != null)
				654	return aliasSet;
				655	int n = aliases.length;
				656	HashSet hs = new HashSet(n);
				657	for (int i = 0; i < n; i++)
				658	hs.add(aliases[i]);
				659	aliasSet = Collections.unmodifiableSet(hs);
				660	return aliasSet;
				661	}
				662
				663	/**
				664	* Returns this charset's human-readable name for the default locale.
				665	*
				666	* <p> The default implementation of this method simply returns this
				667	* charset's canonical name. Concrete subclasses of this class may
				668	* override this method in order to provide a localized display name. </p>
				669	*
				670	* @return The display name of this charset in the default locale
				671	*/
				672	public String displayName() {
				673	return name;
				674	}
				675
				676	/**
				677	* Tells whether or not this charset is registered in the <a
				678	* href="http://www.iana.org/assignments/character-sets">IANA Charset
				679	* Registry</a>. </p>
				680	*
				681	* @return <tt>true</tt> if, and only if, this charset is known by its
				682	* implementor to be registered with the IANA
				683	*/
				684	public final boolean isRegistered() {
				685	return !name.startsWith("X-") && !name.startsWith("x-");
				686	}
				687
				688	/**
				689	* Returns this charset's human-readable name for the given locale.
				690	*
				691	* <p> The default implementation of this method simply returns this
				692	* charset's canonical name. Concrete subclasses of this class may
				693	* override this method in order to provide a localized display name. </p>
				694	*
				695	* @param locale
				696	* The locale for which the display name is to be retrieved
				697	*
				698	* @return The display name of this charset in the given locale
				699	*/
				700	public String displayName(Locale locale) {
				701	return name;
				702	}
				703
				704	/**
				705	* Tells whether or not this charset contains the given charset.
				706	*
				707	* <p> A charset <i>C</i> is said to <i>contain</i> a charset <i>D</i> if,
				708	* and only if, every character representable in <i>D</i> is also
				709	* representable in <i>C</i>. If this relationship holds then it is
				710	* guaranteed that every string that can be encoded in <i>D</i> can also be
				711	* encoded in <i>C</i> without performing any replacements.
				712	*
				713	* <p> That <i>C</i> contains <i>D</i> does not imply that each character
				714	* representable in <i>C</i> by a particular byte sequence is represented
				715	* in <i>D</i> by the same byte sequence, although sometimes this is the
				716	* case.
				717	*
				718	* <p> Every charset contains itself.
				719	*
				720	* <p> This method computes an approximation of the containment relation:
				721	* If it returns <tt>true</tt> then the given charset is known to be
				722	* contained by this charset; if it returns <tt>false</tt>, however, then
				723	* it is not necessarily the case that the given charset is not contained
				724	* in this charset.
				725	*
				726	* @return <tt>true</tt> if the given charset is contained in this charset
				727	*/
				728	public abstract boolean contains(Charset cs);
				729
				730	/**
				731	* Constructs a new decoder for this charset. </p>
				732	*
				733	* @return A new decoder for this charset
				734	*/
				735	public abstract CharsetDecoder newDecoder();
				736
				737	/**
				738	* Constructs a new encoder for this charset. </p>
				739	*
				740	* @return A new encoder for this charset
				741	*
				742	* @throws UnsupportedOperationException
				743	* If this charset does not support encoding
				744	*/
				745	public abstract CharsetEncoder newEncoder();
				746
				747	/**
				748	* Tells whether or not this charset supports encoding.
				749	*
				750	* <p> Nearly all charsets support encoding. The primary exceptions are
				751	* special-purpose <i>auto-detect</i> charsets whose decoders can determine
				752	* which of several possible encoding schemes is in use by examining the
				753	* input byte sequence. Such charsets do not support encoding because
				754	* there is no way to determine which encoding should be used on output.
				755	* Implementations of such charsets should override this method to return
				756	* <tt>false</tt>. </p>
				757	*
				758	* @return <tt>true</tt> if, and only if, this charset supports encoding
				759	*/
				760	public boolean canEncode() {
				761	return true;
				762	}
				763
				764	/**
				765	* Convenience method that decodes bytes in this charset into Unicode
				766	* characters.
				767	*
				768	* <p> An invocation of this method upon a charset <tt>cs</tt> returns the
				769	* same result as the expression
				770	*
				771	* <pre>
				772	* cs.newDecoder()
				773	* .onMalformedInput(CodingErrorAction.REPLACE)
				774	* .onUnmappableCharacter(CodingErrorAction.REPLACE)
				775	* .decode(bb); </pre>
				776	*
				777	* except that it is potentially more efficient because it can cache
				778	* decoders between successive invocations.
				779	*
				780	* <p> This method always replaces malformed-input and unmappable-character
				781	* sequences with this charset's default replacement byte array. In order
				782	* to detect such sequences, use the {@link
				783	* CharsetDecoder#decode(java.nio.ByteBuffer)} method directly. </p>
				784	*
				785	* @param bb The byte buffer to be decoded
				786	*
				787	* @return A char buffer containing the decoded characters
				788	*/
				789	public final CharBuffer decode(ByteBuffer bb) {
				790	try {
				791	return ThreadLocalCoders.decoderFor(this)
				792	.onMalformedInput(CodingErrorAction.REPLACE)
				793	.onUnmappableCharacter(CodingErrorAction.REPLACE)
				794	.decode(bb);
				795	} catch (CharacterCodingException x) {
				796	throw new Error(x); // Can't happen
				797	}
				798	}
				799
				800	/**
				801	* Convenience method that encodes Unicode characters into bytes in this
				802	* charset.
				803	*
				804	* <p> An invocation of this method upon a charset <tt>cs</tt> returns the
				805	* same result as the expression
				806	*
				807	* <pre>
				808	* cs.newEncoder()
				809	* .onMalformedInput(CodingErrorAction.REPLACE)
				810	* .onUnmappableCharacter(CodingErrorAction.REPLACE)
				811	* .encode(bb); </pre>
				812	*
				813	* except that it is potentially more efficient because it can cache
				814	* encoders between successive invocations.
				815	*
				816	* <p> This method always replaces malformed-input and unmappable-character
				817	* sequences with this charset's default replacement string. In order to
				818	* detect such sequences, use the {@link
				819	* CharsetEncoder#encode(java.nio.CharBuffer)} method directly. </p>
				820	*
				821	* @param cb The char buffer to be encoded
				822	*
				823	* @return A byte buffer containing the encoded characters
				824	*/
				825	public final ByteBuffer encode(CharBuffer cb) {
				826	try {
				827	return ThreadLocalCoders.encoderFor(this)
				828	.onMalformedInput(CodingErrorAction.REPLACE)
				829	.onUnmappableCharacter(CodingErrorAction.REPLACE)
				830	.encode(cb);
				831	} catch (CharacterCodingException x) {
				832	throw new Error(x); // Can't happen
				833	}
				834	}
				835
				836	/**
				837	* Convenience method that encodes a string into bytes in this charset.
				838	*
				839	* <p> An invocation of this method upon a charset <tt>cs</tt> returns the
				840	* same result as the expression
				841	*
				842	* <pre>
				843	* cs.encode(CharBuffer.wrap(s)); </pre>
				844	*
				845	* @param str The string to be encoded
				846	*
				847	* @return A byte buffer containing the encoded characters
				848	*/
				849	public final ByteBuffer encode(String str) {
				850	return encode(CharBuffer.wrap(str));
				851	}
				852
				853	/**
				854	* Compares this charset to another.
				855	*
				856	* <p> Charsets are ordered by their canonical names, without regard to
				857	* case. </p>
				858	*
				859	* @param that
				860	* The charset to which this charset is to be compared
				861	*
				862	* @return A negative integer, zero, or a positive integer as this charset
				863	* is less than, equal to, or greater than the specified charset
				864	*/
				865	public final int compareTo(Charset that) {
				866	return (name().compareToIgnoreCase(that.name()));
				867	}
				868
				869	/**
				870	* Computes a hashcode for this charset. </p>
				871	*
				872	* @return An integer hashcode
				873	*/
				874	public final int hashCode() {
				875	return name().hashCode();
				876	}
				877
				878	/**
				879	* Tells whether or not this object is equal to another.
				880	*
				881	* <p> Two charsets are equal if, and only if, they have the same canonical
				882	* names. A charset is never equal to any other type of object. </p>
				883	*
				884	* @return <tt>true</tt> if, and only if, this charset is equal to the
				885	* given object
				886	*/
				887	public final boolean equals(Object ob) {
				888	if (!(ob instanceof Charset))
				889	return false;
				890	if (this == ob)
				891	return true;
				892	return name.equals(((Charset)ob).name());
				893	}
				894
				895	/**
				896	* Returns a string describing this charset. </p>
				897	*
				898	* @return A string describing this charset
				899	*/
				900	public final String toString() {
				901	return name();
				902	}
				903
				904	}