Blame - jdk/src/share/classes/sun/text/normalizer/UCharacter.java - platform/libcore

blob: 26a5eca99fce1191cd34863e666f8065ae7cfd74 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Portions Copyright 2005 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25	/*
				26	*******************************************************************************
				27	* (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved *
				28	* *
				29	* The original version of this source code and documentation is copyrighted *
				30	* and owned by IBM, These materials are provided under terms of a License *
				31	* Agreement between IBM and Sun. This technology is protected by multiple *
				32	* US and International patents. This notice and attribution to IBM may not *
				33	* to removed. *
				34	*******************************************************************************
				35	*/
				36
				37	package sun.text.normalizer;
				38
				39	import java.lang.ref.SoftReference;
				40	import java.util.HashMap;
				41	import java.util.Locale;
				42	import java.util.Map;
				43
				44	/**
				45	* <p>
				46	* The UCharacter class provides extensions to the
				47	* <a href=http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.html>
				48	* java.lang.Character</a> class. These extensions provide support for
				49	* Unicode 3.2 properties and together with the <a href=../text/UTF16.html>UTF16</a>
				50	* class, provide support for supplementary characters (those with code
				51	* points above U+FFFF).
				52	* </p>
				53	* <p>
				54	* Code points are represented in these API using ints. While it would be
				55	* more convenient in Java to have a separate primitive datatype for them,
				56	* ints suffice in the meantime.
				57	* </p>
				58	* <p>
				59	* To use this class please add the jar file name icu4j.jar to the
				60	* class path, since it contains data files which supply the information used
				61	* by this file.<br>
				62	* E.g. In Windows <br>
				63	* <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
				64	* Otherwise, another method would be to copy the files uprops.dat and
				65	* unames.icu from the icu4j source subdirectory
				66	* <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
				67	* <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
				68	* </p>
				69	* <p>
				70	* Aside from the additions for UTF-16 support, and the updated Unicode 3.1
				71	* properties, the main differences between UCharacter and Character are:
				72	* <ul>
				73	* <li> UCharacter is not designed to be a char wrapper and does not have
				74	* APIs to which involves management of that single char.<br>
				75	* These include:
				76	* <ul>
				77	* <li> char charValue(),
				78	* <li> int compareTo(java.lang.Character, java.lang.Character), etc.
				79	* </ul>
				80	* <li> UCharacter does not include Character APIs that are deprecated, not
				81	* does it include the Java-specific character information, such as
				82	* boolean isJavaIdentifierPart(char ch).
				83	* <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
				84	* values '10' - '35'. UCharacter also does this in digit and
				85	* getNumericValue, to adhere to the java semantics of these
				86	* methods. New methods unicodeDigit, and
				87	* getUnicodeNumericValue do not treat the above code points
				88	* as having numeric values. This is a semantic change from ICU4J 1.3.1.
				89	* </ul>
				90	* <p>
				91	* Further detail differences can be determined from the program
				92	* <a href = http://oss.software.ibm.com/developerworks/opensource/cvs/icu4j/~checkout~/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java>
				93	* com.ibm.icu.dev.test.lang.UCharacterCompare</a>
				94	* </p>
				95	* <p>
				96	* This class is not subclassable
				97	* </p>
				98	* @author Syn Wee Quek
				99	* @stable ICU 2.1
				100	* @see com.ibm.icu.lang.UCharacterEnums
				101	*/
				102
				103	public final class UCharacter
				104	{
				105
				106	/**
				107	* Numeric Type constants.
				108	* @see UProperty#NUMERIC_TYPE
				109	* @stable ICU 2.4
				110	*/
				111	public static interface NumericType
				112	{
				113	/**
				114	* @stable ICU 2.4
				115	*/
				116	public static final int NONE = 0;
				117	/**
				118	* @stable ICU 2.4
				119	*/
				120	public static final int DECIMAL = 1;
				121	/**
				122	* @stable ICU 2.4
				123	*/
				124	public static final int DIGIT = 2;
				125	/**
				126	* @stable ICU 2.4
				127	*/
				128	public static final int NUMERIC = 3;
				129	/**
				130	* @stable ICU 2.4
				131	*/
				132	public static final int COUNT = 4;
				133	}
				134
				135	/**
				136	* Hangul Syllable Type constants.
				137	*
				138	* @see UProperty#HANGUL_SYLLABLE_TYPE
				139	* @stable ICU 2.6
				140	*/
				141	public static interface HangulSyllableType
				142	{
				143	/**
				144	* @stable ICU 2.6
				145	*/
				146	public static final int NOT_APPLICABLE = 0; /[NA]/ /See note !!/
				147	/**
				148	* @stable ICU 2.6
				149	*/
				150	public static final int LEADING_JAMO = 1; /[L]/
				151	/**
				152	* @stable ICU 2.6
				153	*/
				154	public static final int VOWEL_JAMO = 2; /[V]/
				155	/**
				156	* @stable ICU 2.6
				157	*/
				158	public static final int TRAILING_JAMO = 3; /[T]/
				159	/**
				160	* @stable ICU 2.6
				161	*/
				162	public static final int LV_SYLLABLE = 4; /[LV]/
				163	/**
				164	* @stable ICU 2.6
				165	*/
				166	public static final int LVT_SYLLABLE = 5; /[LVT]/
				167	/**
				168	* @stable ICU 2.6
				169	*/
				170	public static final int COUNT = 6;
				171	}
				172
				173	/**
				174	* [Sun] This interface moved from UCharacterEnums.java.
				175	*
				176	* 'Enum' for the CharacterCategory constants. These constants are
				177	* compatible in name <b>but not in value</b> with those defined in
				178	* <code>java.lang.Character</code>.
				179	* @see UCharacterCategory
				180	* @draft ICU 3.0
				181	* @deprecated This is a draft API and might change in a future release of ICU.
				182	*/
				183	public static interface ECharacterCategory
				184	{
				185	/**
				186	* Character type Lu
				187	* @stable ICU 2.1
				188	*/
				189	public static final int UPPERCASE_LETTER = 1;
				190
				191	/**
				192	* Character type Lt
				193	* @stable ICU 2.1
				194	*/
				195	public static final int TITLECASE_LETTER = 3;
				196
				197	/**
				198	* Character type Lo
				199	* @stable ICU 2.1
				200	*/
				201	public static final int OTHER_LETTER = 5;
				202	}
				203
				204	// public data members -----------------------------------------------
				205
				206	/**
				207	* The lowest Unicode code point value.
				208	* @stable ICU 2.1
				209	*/
				210	public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
				211
				212	/**
				213	* The highest Unicode code point value (scalar value) according to the
				214	* Unicode Standard.
				215	* This is a 21-bit value (21 bits, rounded up).<br>
				216	* Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE
				217	* @stable ICU 2.1
				218	*/
				219	public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
				220
				221	/**
				222	* The minimum value for Supplementary code points
				223	* @stable ICU 2.1
				224	*/
				225	public static final int SUPPLEMENTARY_MIN_VALUE =
				226	UTF16.SUPPLEMENTARY_MIN_VALUE;
				227
				228	/**
				229	* Special value that is returned by getUnicodeNumericValue(int) when no
				230	* numeric value is defined for a code point.
				231	* @stable ICU 2.4
				232	* @see #getUnicodeNumericValue
				233	*/
				234	public static final double NO_NUMERIC_VALUE = -123456789;
				235
				236	// public methods ----------------------------------------------------
				237
				238	/**
				239	* Retrieves the numeric value of a decimal digit code point.
				240	* <br>This method observes the semantics of
				241	* <code>java.lang.Character.digit()</code>. Note that this
				242	* will return positive values for code points for which isDigit
				243	* returns false, just like java.lang.Character.
				244	* <br><em>Semantic Change:</em> In release 1.3.1 and
				245	* prior, this did not treat the European letters as having a
				246	* digit value, and also treated numeric letters and other numbers as
				247	* digits.
				248	* This has been changed to conform to the java semantics.
				249	* <br>A code point is a valid digit if and only if:
				250	* <ul>
				251	* <li>ch is a decimal digit or one of the european letters, and
				252	* <li>the value of ch is less than the specified radix.
				253	* </ul>
				254	* @param ch the code point to query
				255	* @param radix the radix
				256	* @return the numeric value represented by the code point in the
				257	* specified radix, or -1 if the code point is not a decimal digit
				258	* or if its value is too large for the radix
				259	* @stable ICU 2.1
				260	*/
				261	public static int digit(int ch, int radix)
				262	{
				263	// when ch is out of bounds getProperty == 0
				264	int props = getProperty(ch);
				265	if (getNumericType(props) != NumericType.DECIMAL) {
				266	return (radix <= 10) ? -1 : getEuropeanDigit(ch);
				267	}
				268	// if props == 0, it will just fall through and return -1
				269	if (isNotExceptionIndicator(props)) {
				270	// not contained in exception data
				271	// getSignedValue is just shifting so we can check for the sign
				272	// first
				273	// Optimization
				274	// int result = UCharacterProperty.getSignedValue(props);
				275	// if (result >= 0) {
				276	// return result;
				277	// }
				278	if (props >= 0) {
				279	return UCharacterProperty.getSignedValue(props);
				280	}
				281	}
				282	else {
				283	int index = UCharacterProperty.getExceptionIndex(props);
				284	if (PROPERTY_.hasExceptionValue(index,
				285	UCharacterProperty.EXC_NUMERIC_VALUE_)) {
				286	int result = PROPERTY_.getException(index,
				287	UCharacterProperty.EXC_NUMERIC_VALUE_);
				288	if (result >= 0) {
				289	return result;
				290	}
				291	}
				292	}
				293
				294	if (radix > 10) {
				295	int result = getEuropeanDigit(ch);
				296	if (result >= 0 && result < radix) {
				297	return result;
				298	}
				299	}
				300	return -1;
				301	}
				302
				303	/**
				304	* <p>Get the numeric value for a Unicode code point as defined in the
				305	* Unicode Character Database.</p>
				306	* <p>A "double" return type is necessary because some numeric values are
				307	* fractions, negative, or too large for int.</p>
				308	* <p>For characters without any numeric values in the Unicode Character
				309	* Database, this function will return NO_NUMERIC_VALUE.</p>
				310	* <p><em>API Change:</em> In release 2.2 and prior, this API has a
				311	* return type int and returns -1 when the argument ch does not have a
				312	* corresponding numeric value. This has been changed to synch with ICU4C
				313	* </p>
				314	* This corresponds to the ICU4C function u_getNumericValue.
				315	* @param ch Code point to get the numeric value for.
				316	* @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
				317	* @stable ICU 2.4
				318	*/
				319	public static double getUnicodeNumericValue(int ch)
				320	{
				321	// equivalent to c version double u_getNumericValue(UChar32 c)
				322	int props = PROPERTY_.getProperty(ch);
				323	int numericType = getNumericType(props);
				324	if (numericType > NumericType.NONE && numericType < NumericType.COUNT) {
				325	if (isNotExceptionIndicator(props)) {
				326	return UCharacterProperty.getSignedValue(props);
				327	}
				328	else {
				329	int index = UCharacterProperty.getExceptionIndex(props);
				330	boolean nex = false;
				331	boolean dex = false;
				332	double numerator = 0;
				333	if (PROPERTY_.hasExceptionValue(index,
				334	UCharacterProperty.EXC_NUMERIC_VALUE_)) {
				335	int num = PROPERTY_.getException(index,
				336	UCharacterProperty.EXC_NUMERIC_VALUE_);
				337	// There are special values for huge numbers that are
				338	// powers of ten. genprops/store.c documents:
				339	// if numericValue = 0x7fffff00 + x then
				340	// numericValue = 10 ^ x
				341	if (num >= NUMERATOR_POWER_LIMIT_) {
				342	num &= 0xff;
				343	// 10^x without math.h
				344	numerator = Math.pow(10, num);
				345	}
				346	else {
				347	numerator = num;
				348	}
				349	nex = true;
				350	}
				351	double denominator = 0;
				352	if (PROPERTY_.hasExceptionValue(index,
				353	UCharacterProperty.EXC_DENOMINATOR_VALUE_)) {
				354	denominator = PROPERTY_.getException(index,
				355	UCharacterProperty.EXC_DENOMINATOR_VALUE_);
				356	// faster path not in c
				357	if (numerator != 0) {
				358	return numerator / denominator;
				359	}
				360	dex = true;
				361	}
				362
				363	if (nex) {
				364	if (dex) {
				365	return numerator / denominator;
				366	}
				367	return numerator;
				368	}
				369	if (dex) {
				370	return 1 / denominator;
				371	}
				372	}
				373	}
				374	return NO_NUMERIC_VALUE;
				375	}
				376
				377	/**
				378	* Returns a value indicating a code point's Unicode category.
				379	* Up-to-date Unicode implementation of java.lang.Character.getType()
				380	* except for the above mentioned code points that had their category
				381	* changed.<br>
				382	* Return results are constants from the interface
				383	* <a href=UCharacterCategory.html>UCharacterCategory</a><br>
				384	* <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
				385	* those returned by java.lang.Character.getType. UCharacterCategory values
				386	* match the ones used in ICU4C, while java.lang.Character type
				387	* values, though similar, skip the value 17.</p>
				388	* @param ch code point whose type is to be determined
				389	* @return category which is a value of UCharacterCategory
				390	* @stable ICU 2.1
				391	*/
				392	public static int getType(int ch)
				393	{
				394	return getProperty(ch) & UCharacterProperty.TYPE_MASK;
				395	}
				396
				397	//// for StringPrep
				398	/**
				399	* Returns a code point corresponding to the two UTF16 characters.
				400	* @param lead the lead char
				401	* @param trail the trail char
				402	* @return code point if surrogate characters are valid.
				403	* @exception IllegalArgumentException thrown when argument characters do
				404	* not form a valid codepoint
				405	* @stable ICU 2.1
				406	*/
				407	public static int getCodePoint(char lead, char trail)
				408	{
				409	if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
				410	lead <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
				411	trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
				412	trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
				413	return UCharacterProperty.getRawSupplementary(lead, trail);
				414	}
				415	throw new IllegalArgumentException("Illegal surrogate characters");
				416	}
				417
				418	//// for StringPrep
				419	/**
				420	* Returns the Bidirection property of a code point.
				421	* For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
				422	* property.<br>
				423	* Result returned belongs to the interface
				424	* <a href=UCharacterDirection.html>UCharacterDirection</a>
				425	* @param ch the code point to be determined its direction
				426	* @return direction constant from UCharacterDirection.
				427	* @stable ICU 2.1
				428	*/
				429	public static int getDirection(int ch)
				430	{
				431	// when ch is out of bounds getProperty == 0
				432	return (getProperty(ch) >> BIDI_SHIFT_) & BIDI_MASK_AFTER_SHIFT_;
				433	}
				434
				435	/**
				436	* The given string is mapped to its case folding equivalent according to
				437	* UnicodeData.txt and CaseFolding.txt; if any character has no case
				438	* folding equivalent, the character itself is returned.
				439	* "Full", multiple-code point case folding mappings are returned here.
				440	* For "simple" single-code point mappings use the API
				441	* foldCase(int ch, boolean defaultmapping).
				442	* @param str the String to be converted
				443	* @param defaultmapping Indicates if all mappings defined in
				444	* CaseFolding.txt is to be used, otherwise the
				445	* mappings for dotted I and dotless i marked with
				446	* 'I' in CaseFolding.txt will be skipped.
				447	* @return the case folding equivalent of the character, if
				448	* any; otherwise the character itself.
				449	* @see #foldCase(int, boolean)
				450	* @stable ICU 2.1
				451	*/
				452	public static String foldCase(String str, boolean defaultmapping)
				453	{
				454	int size = str.length();
				455	StringBuffer result = new StringBuffer(size);
				456	int offset = 0;
				457	int ch;
				458
				459	// case mapping loop
				460	while (offset < size) {
				461	ch = UTF16.charAt(str, offset);
				462	offset += UTF16.getCharCount(ch);
				463	int props = PROPERTY_.getProperty(ch);
				464	if (isNotExceptionIndicator(props)) {
				465	int type = UCharacterProperty.TYPE_MASK & props;
				466	if (type == ECharacterCategory.UPPERCASE_LETTER \|\|
				467	type == ECharacterCategory.TITLECASE_LETTER) {
				468	ch += UCharacterProperty.getSignedValue(props);
				469	}
				470	}
				471	else {
				472	int index = UCharacterProperty.getExceptionIndex(props);
				473	if (PROPERTY_.hasExceptionValue(index,
				474	UCharacterProperty.EXC_CASE_FOLDING_)) {
				475	int exception = PROPERTY_.getException(index,
				476	UCharacterProperty.EXC_CASE_FOLDING_);
				477	if (exception != 0) {
				478	PROPERTY_.getFoldCase(exception & LAST_CHAR_MASK_,
				479	exception >> SHIFT_24_, result);
				480	}
				481	else {
				482	// special case folding mappings, hardcoded
				483	if (ch != 0x49 && ch != 0x130) {
				484	// return ch itself because there is no special
				485	// mapping for it
				486	UTF16.append(result, ch);
				487	continue;
				488	}
				489	if (defaultmapping) {
				490	// default mappings
				491	if (ch == 0x49) {
				492	// 0049; C; 0069; # LATIN CAPITAL LETTER I
				493	result.append(
				494	UCharacterProperty.LATIN_SMALL_LETTER_I_);
				495	}
				496	else if (ch == 0x130) {
				497	// 0130; F; 0069 0307;
				498	// # LATIN CAPITAL LETTER I WITH DOT ABOVE
				499	result.append(
				500	UCharacterProperty.LATIN_SMALL_LETTER_I_);
				501	result.append((char)0x307);
				502	}
				503	}
				504	else {
				505	// Turkic mappings
				506	if (ch == 0x49) {
				507	// 0049; T; 0131; # LATIN CAPITAL LETTER I
				508	result.append((char)0x131);
				509	}
				510	else if (ch == 0x130) {
				511	// 0130; T; 0069;
				512	// # LATIN CAPITAL LETTER I WITH DOT ABOVE
				513	result.append(
				514	UCharacterProperty.LATIN_SMALL_LETTER_I_);
				515	}
				516	}
				517	}
				518	// do not fall through to the output of c
				519	continue;
				520	}
				521	else {
				522	if (PROPERTY_.hasExceptionValue(index,
				523	UCharacterProperty.EXC_LOWERCASE_)) {
				524	ch = PROPERTY_.getException(index,
				525	UCharacterProperty.EXC_LOWERCASE_);
				526	}
				527	}
				528
				529	}
				530
				531	// handle 1:1 code point mappings from UnicodeData.txt
				532	UTF16.append(result, ch);
				533	}
				534
				535	return result.toString();
				536	}
				537
				538	/**
				539	* <p>Get the "age" of the code point.</p>
				540	* <p>The "age" is the Unicode version when the code point was first
				541	* designated (as a non-character or for Private Use) or assigned a
				542	* character.
				543	* <p>This can be useful to avoid emitting code points to receiving
				544	* processes that do not accept newer characters.</p>
				545	* <p>The data is from the UCD file DerivedAge.txt.</p>
				546	* @param ch The code point.
				547	* @return the Unicode version number
				548	* @stable ICU 2.6
				549	*/
				550	public static VersionInfo getAge(int ch)
				551	{
				552	if (ch < MIN_VALUE \|\| ch > MAX_VALUE) {
				553	throw new IllegalArgumentException("Codepoint out of bounds");
				554	}
				555	return PROPERTY_.getAge(ch);
				556	}
				557
				558	/**
				559	* <p>Gets the property value for an Unicode property type of a code point.
				560	* Also returns binary and mask property values.</p>
				561	* <p>Unicode, especially in version 3.2, defines many more properties than
				562	* the original set in UnicodeData.txt.</p>
				563	* <p>The properties APIs are intended to reflect Unicode properties as
				564	* defined in the Unicode Character Database (UCD) and Unicode Technical
				565	* Reports (UTR). For details about the properties see
				566	* http://www.unicode.org/.</p>
				567	* <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
				568	* </p>
				569	* <pre>
				570	* Sample usage:
				571	* int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
				572	* int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
				573	* boolean b = (ideo == 1) ? true : false;
				574	* </pre>
				575	* @param ch code point to test.
				576	* @param type UProperty selector constant, identifies which binary
				577	* property to check. Must be
				578	* UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or
				579	* UProperty.INT_START <= type < UProperty.INT_LIMIT or
				580	* UProperty.MASK_START <= type < UProperty.MASK_LIMIT.
				581	* @return numeric value that is directly the property value or,
				582	* for enumerated properties, corresponds to the numeric value of
				583	* the enumerated constant of the respective property value
				584	* enumeration type (cast to enum type if necessary).
				585	* Returns 0 or 1 (for false / true) for binary Unicode properties.
				586	* Returns a bit-mask for mask properties.
				587	* Returns 0 if 'type' is out of bounds or if the Unicode version
				588	* does not have data for the property at all, or not for this code
				589	* point.
				590	* @see UProperty
				591	* @see #hasBinaryProperty
				592	* @see #getIntPropertyMinValue
				593	* @see #getIntPropertyMaxValue
				594	* @see #getUnicodeVersion
				595	* @stable ICU 2.4
				596	*/
				597	public static int getIntPropertyValue(int ch, int type)
				598	{
				599	/*
				600	* For Normalizer with Unicode 3.2, this method is called only for
				601	* HANGUL_SYLLABLE_TYPE in UnicodeSet.addPropertyStarts().
				602	*/
				603	if (type == UProperty.HANGUL_SYLLABLE_TYPE) {
				604	/* purely algorithmic; hardcode known characters, check for assigned new ones */
				605	if(ch<NormalizerImpl.JAMO_L_BASE) {
				606	/* NA */
				607	} else if(ch<=0x11ff) {
				608	/* Jamo range */
				609	if(ch<=0x115f) {
				610	/* Jamo L range, HANGUL CHOSEONG ... */
				611	if(ch==0x115f \|\| ch<=0x1159 \|\| getType(ch)==ECharacterCategory.OTHER_LETTER) {
				612	return HangulSyllableType.LEADING_JAMO;
				613	}
				614	} else if(ch<=0x11a7) {
				615	/* Jamo V range, HANGUL JUNGSEONG ... */
				616	if(ch<=0x11a2 \|\| getType(ch)==ECharacterCategory.OTHER_LETTER) {
				617	return HangulSyllableType.VOWEL_JAMO;
				618	}
				619	} else {
				620	/* Jamo T range */
				621	if(ch<=0x11f9 \|\| getType(ch)==ECharacterCategory.OTHER_LETTER) {
				622	return HangulSyllableType.TRAILING_JAMO;
				623	}
				624	}
				625	} else if((ch-=NormalizerImpl.HANGUL_BASE)<0) {
				626	/* NA */
				627	} else if(ch<NormalizerImpl.HANGUL_COUNT) {
				628	/* Hangul syllable */
				629	return ch%NormalizerImpl.JAMO_T_COUNT==0 ? HangulSyllableType.LV_SYLLABLE : HangulSyllableType.LVT_SYLLABLE;
				630	}
				631	}
				632	return 0; /* NA */
				633	}
				634
				635	// private variables -------------------------------------------------
				636
				637	/**
				638	* Database storing the sets of character property
				639	*/
				640	private static final UCharacterProperty PROPERTY_;
				641	/**
				642	* For optimization
				643	*/
				644	private static final char[] PROPERTY_TRIE_INDEX_;
				645	private static final char[] PROPERTY_TRIE_DATA_;
				646	private static final int[] PROPERTY_DATA_;
				647	private static final int PROPERTY_INITIAL_VALUE_;
				648
				649	// block to initialise character property database
				650	static
				651	{
				652	try
				653	{
				654	PROPERTY_ = UCharacterProperty.getInstance();
				655	PROPERTY_TRIE_INDEX_ = PROPERTY_.m_trieIndex_;
				656	PROPERTY_TRIE_DATA_ = PROPERTY_.m_trieData_;
				657	PROPERTY_DATA_ = PROPERTY_.m_property_;
				658	PROPERTY_INITIAL_VALUE_
				659	= PROPERTY_DATA_[PROPERTY_.m_trieInitialValue_];
				660	}
				661	catch (Exception e)
				662	{
				663	throw new RuntimeException(e.getMessage());
				664	}
				665	}
				666
				667	/**
				668	* To get the last character out from a data type
				669	*/
				670	private static final int LAST_CHAR_MASK_ = 0xFFFF;
				671
				672	/**
				673	* To get the last byte out from a data type
				674	*/
				675	// private static final int LAST_BYTE_MASK_ = 0xFF;
				676
				677	/**
				678	* Shift 16 bits
				679	*/
				680	// private static final int SHIFT_16_ = 16;
				681
				682	/**
				683	* Shift 24 bits
				684	*/
				685	private static final int SHIFT_24_ = 24;
				686
				687	/**
				688	* Shift to get numeric type
				689	*/
				690	private static final int NUMERIC_TYPE_SHIFT_ = 12;
				691	/**
				692	* Mask to get numeric type
				693	*/
				694	private static final int NUMERIC_TYPE_MASK_ = 0x7 << NUMERIC_TYPE_SHIFT_;
				695	/**
				696	* Shift to get bidi bits
				697	*/
				698	private static final int BIDI_SHIFT_ = 6;
				699
				700	/**
				701	* Mask to be applied after shifting to get bidi bits
				702	*/
				703	private static final int BIDI_MASK_AFTER_SHIFT_ = 0x1F;
				704
				705	/**
				706	* <p>Numerator power limit.
				707	* There are special values for huge numbers that are powers of ten.</p>
				708	* <p>c version genprops/store.c documents:
				709	* if numericValue = 0x7fffff00 + x then numericValue = 10 ^ x</p>
				710	*/
				711	private static final int NUMERATOR_POWER_LIMIT_ = 0x7fffff00;
				712	/**
				713	* Integer properties mask and shift values for joining type.
				714	* Equivalent to icu4c UPROPS_JT_MASK.
				715	*/
				716	private static final int JOINING_TYPE_MASK_ = 0x00003800;
				717	/**
				718	* Integer properties mask and shift values for joining type.
				719	* Equivalent to icu4c UPROPS_JT_SHIFT.
				720	*/
				721	private static final int JOINING_TYPE_SHIFT_ = 11;
				722	/**
				723	* Integer properties mask and shift values for joining group.
				724	* Equivalent to icu4c UPROPS_JG_MASK.
				725	*/
				726	private static final int JOINING_GROUP_MASK_ = 0x000007e0;
				727	/**
				728	* Integer properties mask and shift values for joining group.
				729	* Equivalent to icu4c UPROPS_JG_SHIFT.
				730	*/
				731	private static final int JOINING_GROUP_SHIFT_ = 5;
				732	/**
				733	* Integer properties mask for decomposition type.
				734	* Equivalent to icu4c UPROPS_DT_MASK.
				735	*/
				736	private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
				737	/**
				738	* Integer properties mask and shift values for East Asian cell width.
				739	* Equivalent to icu4c UPROPS_EA_MASK
				740	*/
				741	private static final int EAST_ASIAN_MASK_ = 0x00038000;
				742	/**
				743	* Integer properties mask and shift values for East Asian cell width.
				744	* Equivalent to icu4c UPROPS_EA_SHIFT
				745	*/
				746	private static final int EAST_ASIAN_SHIFT_ = 15;
				747
				748	/**
				749	* Integer properties mask and shift values for line breaks.
				750	* Equivalent to icu4c UPROPS_LB_MASK
				751	*/
				752	private static final int LINE_BREAK_MASK_ = 0x007C0000;
				753	/**
				754	* Integer properties mask and shift values for line breaks.
				755	* Equivalent to icu4c UPROPS_LB_SHIFT
				756	*/
				757	private static final int LINE_BREAK_SHIFT_ = 18;
				758	/**
				759	* Integer properties mask and shift values for blocks.
				760	* Equivalent to icu4c UPROPS_BLOCK_MASK
				761	*/
				762	private static final int BLOCK_MASK_ = 0x00007f80;
				763	/**
				764	* Integer properties mask and shift values for blocks.
				765	* Equivalent to icu4c UPROPS_BLOCK_SHIFT
				766	*/
				767	private static final int BLOCK_SHIFT_ = 7;
				768	/**
				769	* Integer properties mask and shift values for scripts.
				770	* Equivalent to icu4c UPROPS_SHIFT_MASK
				771	*/
				772	private static final int SCRIPT_MASK_ = 0x0000007f;
				773
				774	// private constructor -----------------------------------------------
				775	///CLOVER:OFF
				776	/**
				777	* Private constructor to prevent instantiation
				778	*/
				779	private UCharacter()
				780	{
				781	}
				782	///CLOVER:ON
				783	// private methods ---------------------------------------------------
				784
				785	/**
				786	* Getting the digit values of characters like 'A' - 'Z', normal,
				787	* half-width and full-width. This method assumes that the other digit
				788	* characters are checked by the calling method.
				789	* @param ch character to test
				790	* @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
				791	* its corresponding digit will be returned.
				792	*/
				793	private static int getEuropeanDigit(int ch) {
				794	if ((ch > 0x7a && ch < 0xff21)
				795	\|\| ch < 0x41 \|\| (ch > 0x5a && ch < 0x61)
				796	\|\| ch > 0xff5a \|\| (ch > 0xff31 && ch < 0xff41)) {
				797	return -1;
				798	}
				799	if (ch <= 0x7a) {
				800	// ch >= 0x41 or ch < 0x61
				801	return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
				802	}
				803	// ch >= 0xff21
				804	if (ch <= 0xff3a) {
				805	return ch + 10 - 0xff21;
				806	}
				807	// ch >= 0xff41 && ch <= 0xff5a
				808	return ch + 10 - 0xff41;
				809	}
				810
				811	/**
				812	* Gets the numeric type of the property argument
				813	* @param props 32 bit property
				814	* @return the numeric type
				815	*/
				816	private static int getNumericType(int props)
				817	{
				818	return (props & NUMERIC_TYPE_MASK_) >> NUMERIC_TYPE_SHIFT_;
				819	}
				820
				821	/**
				822	* Checks if the property value has a exception indicator
				823	* @param props 32 bit property value
				824	* @return true if property does not have a exception indicator, false
				825	* otherwise
				826	*/
				827	private static boolean isNotExceptionIndicator(int props)
				828	{
				829	return (props & UCharacterProperty.EXCEPTION_MASK) == 0;
				830	}
				831
				832	/**
				833	* Gets the property value at the index.
				834	* This is optimized.
				835	* Note this is alittle different from CharTrie the index m_trieData_
				836	* is never negative.
				837	* This is a duplicate of UCharacterProperty.getProperty. For optimization
				838	* purposes, this method calls the trie data directly instead of through
				839	* UCharacterProperty.getProperty.
				840	* @param ch code point whose property value is to be retrieved
				841	* @return property value of code point
				842	* @stable ICU 2.6
				843	*/
				844	private static int getProperty(int ch)
				845	{
				846	if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
				847	\|\| (ch > UTF16.LEAD_SURROGATE_MAX_VALUE
				848	&& ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
				849	// BMP codepoint
				850	try { // using try for < 0 ch is faster than using an if statement
				851	return PROPERTY_DATA_[
				852	PROPERTY_TRIE_DATA_[
				853	(PROPERTY_TRIE_INDEX_[ch >> 5] << 2)
				854	+ (ch & 0x1f)]];
				855	} catch (ArrayIndexOutOfBoundsException e) {
				856	return PROPERTY_INITIAL_VALUE_;
				857	}
				858	}
				859	if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
				860	// surrogate
				861	return PROPERTY_DATA_[
				862	PROPERTY_TRIE_DATA_[
				863	(PROPERTY_TRIE_INDEX_[(0x2800 >> 5) + (ch >> 5)] << 2)
				864	+ (ch & 0x1f)]];
				865	}
				866	// for optimization
				867	if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
				868	// look at the construction of supplementary characters
				869	// trail forms the ends of it.
				870	return PROPERTY_DATA_[PROPERTY_.m_trie_.getSurrogateValue(
				871	UTF16.getLeadSurrogate(ch),
				872	(char)(ch & 0x3ff))];
				873	}
				874	// return m_dataOffset_ if there is an error, in this case we return
				875	// the default value: m_initialValue_
				876	// we cannot assume that m_initialValue_ is at offset 0
				877	// this is for optimization.
				878	return PROPERTY_INITIAL_VALUE_;
				879	}
				880	}