Blame - jdk/src/share/classes/sun/text/normalizer/NormalizerBase.java - platform/libcore

blob: c2fc1ab291dc40a523045e85e7139d7b7b9cd4c4 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Portions Copyright 2001-2006 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	/*
				27	*******************************************************************************
				28	* (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved *
				29	* *
				30	* The original version of this source code and documentation is copyrighted *
				31	* and owned by IBM, These materials are provided under terms of a License *
				32	* Agreement between IBM and Sun. This technology is protected by multiple *
				33	* US and International patents. This notice and attribution to IBM may not *
				34	* to removed. *
				35	*******************************************************************************
				36	*/
				37
				38	package sun.text.normalizer;
				39
				40	import java.text.CharacterIterator;
				41	import java.text.Normalizer;
				42
				43	/**
				44	* Unicode Normalization
				45	*
				46	* <h2>Unicode normalization API</h2>
				47	*
				48	* <code>normalize</code> transforms Unicode text into an equivalent composed or
				49	* decomposed form, allowing for easier sorting and searching of text.
				50	* <code>normalize</code> supports the standard normalization forms described in
				51	* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
				52	* Unicode Standard Annex #15 — Unicode Normalization Forms</a>.
				53	*
				54	* Characters with accents or other adornments can be encoded in
				55	* several different ways in Unicode. For example, take the character A-acute.
				56	* In Unicode, this can be encoded as a single character (the
				57	* "composed" form):
				58	*
				59	* <p>
				60	* 00C1 LATIN CAPITAL LETTER A WITH ACUTE
				61	* </p>
				62	*
				63	* or as two separate characters (the "decomposed" form):
				64	*
				65	* <p>
				66	* 0041 LATIN CAPITAL LETTER A
				67	* 0301 COMBINING ACUTE ACCENT
				68	* </p>
				69	*
				70	* To a user of your program, however, both of these sequences should be
				71	* treated as the same "user-level" character "A with acute accent". When you
				72	* are searching or comparing text, you must ensure that these two sequences are
				73	* treated equivalently. In addition, you must handle characters with more than
				74	* one accent. Sometimes the order of a character's combining accents is
				75	* significant, while in other cases accent sequences in different orders are
				76	* really equivalent.
				77	*
				78	* Similarly, the string "ffi" can be encoded as three separate letters:
				79	*
				80	* <p>
				81	* 0066 LATIN SMALL LETTER F
				82	* 0066 LATIN SMALL LETTER F
				83	* 0069 LATIN SMALL LETTER I
				84	* </p>
				85	*
				86	* or as the single character
				87	*
				88	* <p>
				89	* FB03 LATIN SMALL LIGATURE FFI
				90	* </p>
				91	*
				92	* The ffi ligature is not a distinct semantic character, and strictly speaking
				93	* it shouldn't be in Unicode at all, but it was included for compatibility
				94	* with existing character sets that already provided it. The Unicode standard
				95	* identifies such characters by giving them "compatibility" decompositions
				96	* into the corresponding semantic characters. When sorting and searching, you
				97	* will often want to use these mappings.
				98	*
				99	* <code>normalize</code> helps solve these problems by transforming text into
				100	* the canonical composed and decomposed forms as shown in the first example
				101	* above. In addition, you can have it perform compatibility decompositions so
				102	* that you can treat compatibility characters the same as their equivalents.
				103	* Finally, <code>normalize</code> rearranges accents into the proper canonical
				104	* order, so that you do not have to worry about accent rearrangement on your
				105	* own.
				106	*
				107	* Form FCD, "Fast C or D", is also designed for collation.
				108	* It allows to work on strings that are not necessarily normalized
				109	* with an algorithm (like in collation) that works under "canonical closure",
				110	* i.e., it treats precomposed characters and their decomposed equivalents the
				111	* same.
				112	*
				113	* It is not a normalization form because it does not provide for uniqueness of
				114	* representation. Multiple strings may be canonically equivalent (their NFDs
				115	* are identical) and may all conform to FCD without being identical themselves.
				116	*
				117	* The form is defined such that the "raw decomposition", the recursive
				118	* canonical decomposition of each character, results in a string that is
				119	* canonically ordered. This means that precomposed characters are allowed for
				120	* as long as their decompositions do not need canonical reordering.
				121	*
				122	* Its advantage for a process like collation is that all NFD and most NFC texts
				123	* - and many unnormalized texts - already conform to FCD and do not need to be
				124	* normalized (NFD) for such a process. The FCD quick check will return YES for
				125	* most strings in practice.
				126	*
				127	* normalize(FCD) may be implemented with NFD.
				128	*
				129	* For more details on FCD see the collation design document:
				130	* http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm
				131	*
				132	* ICU collation performs either NFD or FCD normalization automatically if
				133	* normalization is turned on for the collator object. Beyond collation and
				134	* string search, normalized strings may be useful for string equivalence
				135	* comparisons, transliteration/transcription, unique representations, etc.
				136	*
				137	* The W3C generally recommends to exchange texts in NFC.
				138	* Note also that most legacy character encodings use only precomposed forms and
				139	* often do not encode any combining marks by themselves. For conversion to such
				140	* character encodings the Unicode text needs to be normalized to NFC.
				141	* For more usage examples, see the Unicode Standard Annex.
				142	* @stable ICU 2.8
				143	*/
				144
				145	public final class NormalizerBase implements Cloneable {
				146
				147	//-------------------------------------------------------------------------
				148	// Private data
				149	//-------------------------------------------------------------------------
				150	private char[] buffer = new char[100];
				151	private int bufferStart = 0;
				152	private int bufferPos = 0;
				153	private int bufferLimit = 0;
				154
				155	// The input text and our position in it
				156	private UCharacterIterator text;
				157	private Mode mode = NFC;
				158	private int options = 0;
				159	private int currentIndex;
				160	private int nextIndex;
				161
				162	/**
				163	* Options bit set value to select Unicode 3.2 normalization
				164	* (except NormalizationCorrections).
				165	* At most one Unicode version can be selected at a time.
				166	* @stable ICU 2.6
				167	*/
				168	public static final int UNICODE_3_2=0x20;
				169
				170	/**
				171	* Constant indicating that the end of the iteration has been reached.
				172	* This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
				173	* @stable ICU 2.8
				174	*/
				175	public static final int DONE = UCharacterIterator.DONE;
				176
				177	/**
				178	* Constants for normalization modes.
				179	* @stable ICU 2.8
				180	*/
				181	public static class Mode {
				182	private int modeValue;
				183	private Mode(int value) {
				184	modeValue = value;
				185	}
				186
				187	/**
				188	* This method is used for method dispatch
				189	* @stable ICU 2.6
				190	*/
				191	protected int normalize(char[] src, int srcStart, int srcLimit,
				192	char[] dest,int destStart,int destLimit,
				193	UnicodeSet nx) {
				194	int srcLen = (srcLimit - srcStart);
				195	int destLen = (destLimit - destStart);
				196	if( srcLen > destLen ) {
				197	return srcLen;
				198	}
				199	System.arraycopy(src,srcStart,dest,destStart,srcLen);
				200	return srcLen;
				201	}
				202
				203	/**
				204	* This method is used for method dispatch
				205	* @stable ICU 2.6
				206	*/
				207	protected int normalize(char[] src, int srcStart, int srcLimit,
				208	char[] dest,int destStart,int destLimit,
				209	int options) {
				210	return normalize( src, srcStart, srcLimit,
				211	dest,destStart,destLimit,
				212	NormalizerImpl.getNX(options)
				213	);
				214	}
				215
				216	/**
				217	* This method is used for method dispatch
				218	* @stable ICU 2.6
				219	*/
				220	protected String normalize(String src, int options) {
				221	return src;
				222	}
				223
				224	/**
				225	* This method is used for method dispatch
				226	* @stable ICU 2.8
				227	*/
				228	protected int getMinC() {
				229	return -1;
				230	}
				231
				232	/**
				233	* This method is used for method dispatch
				234	* @stable ICU 2.8
				235	*/
				236	protected int getMask() {
				237	return -1;
				238	}
				239
				240	/**
				241	* This method is used for method dispatch
				242	* @stable ICU 2.8
				243	*/
				244	protected IsPrevBoundary getPrevBoundary() {
				245	return null;
				246	}
				247
				248	/**
				249	* This method is used for method dispatch
				250	* @stable ICU 2.8
				251	*/
				252	protected IsNextBoundary getNextBoundary() {
				253	return null;
				254	}
				255
				256	/**
				257	* This method is used for method dispatch
				258	* @stable ICU 2.6
				259	*/
				260	protected QuickCheckResult quickCheck(char[] src,int start, int limit,
				261	boolean allowMaybe,UnicodeSet nx) {
				262	if(allowMaybe) {
				263	return MAYBE;
				264	}
				265	return NO;
				266	}
				267
				268	/**
				269	* This method is used for method dispatch
				270	* @stable ICU 2.8
				271	*/
				272	protected boolean isNFSkippable(int c) {
				273	return true;
				274	}
				275	}
				276
				277	/**
				278	* No decomposition/composition.
				279	* @stable ICU 2.8
				280	*/
				281	public static final Mode NONE = new Mode(1);
				282
				283	/**
				284	* Canonical decomposition.
				285	* @stable ICU 2.8
				286	*/
				287	public static final Mode NFD = new NFDMode(2);
				288
				289	private static final class NFDMode extends Mode {
				290	private NFDMode(int value) {
				291	super(value);
				292	}
				293
				294	protected int normalize(char[] src, int srcStart, int srcLimit,
				295	char[] dest,int destStart,int destLimit,
				296	UnicodeSet nx) {
				297	int[] trailCC = new int[1];
				298	return NormalizerImpl.decompose(src, srcStart,srcLimit,
				299	dest, destStart,destLimit,
				300	false, trailCC,nx);
				301	}
				302
				303	protected String normalize( String src, int options) {
				304	return decompose(src,false,options);
				305	}
				306
				307	protected int getMinC() {
				308	return NormalizerImpl.MIN_WITH_LEAD_CC;
				309	}
				310
				311	protected IsPrevBoundary getPrevBoundary() {
				312	return new IsPrevNFDSafe();
				313	}
				314
				315	protected IsNextBoundary getNextBoundary() {
				316	return new IsNextNFDSafe();
				317	}
				318
				319	protected int getMask() {
				320	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFD);
				321	}
				322
				323	protected QuickCheckResult quickCheck(char[] src,int start,
				324	int limit,boolean allowMaybe,
				325	UnicodeSet nx) {
				326	return NormalizerImpl.quickCheck(
				327	src, start,limit,
				328	NormalizerImpl.getFromIndexesArr(
				329	NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
				330	),
				331	NormalizerImpl.QC_NFD,
				332	0,
				333	allowMaybe,
				334	nx
				335	);
				336	}
				337
				338	protected boolean isNFSkippable(int c) {
				339	return NormalizerImpl.isNFSkippable(c,this,
				340	(NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFD)
				341	);
				342	}
				343	}
				344
				345	/**
				346	* Compatibility decomposition.
				347	* @stable ICU 2.8
				348	*/
				349	public static final Mode NFKD = new NFKDMode(3);
				350
				351	private static final class NFKDMode extends Mode {
				352	private NFKDMode(int value) {
				353	super(value);
				354	}
				355
				356	protected int normalize(char[] src, int srcStart, int srcLimit,
				357	char[] dest,int destStart,int destLimit,
				358	UnicodeSet nx) {
				359	int[] trailCC = new int[1];
				360	return NormalizerImpl.decompose(src, srcStart,srcLimit,
				361	dest, destStart,destLimit,
				362	true, trailCC, nx);
				363	}
				364
				365	protected String normalize( String src, int options) {
				366	return decompose(src,true,options);
				367	}
				368
				369	protected int getMinC() {
				370	return NormalizerImpl.MIN_WITH_LEAD_CC;
				371	}
				372
				373	protected IsPrevBoundary getPrevBoundary() {
				374	return new IsPrevNFDSafe();
				375	}
				376
				377	protected IsNextBoundary getNextBoundary() {
				378	return new IsNextNFDSafe();
				379	}
				380
				381	protected int getMask() {
				382	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFKD);
				383	}
				384
				385	protected QuickCheckResult quickCheck(char[] src,int start,
				386	int limit,boolean allowMaybe,
				387	UnicodeSet nx) {
				388	return NormalizerImpl.quickCheck(
				389	src,start,limit,
				390	NormalizerImpl.getFromIndexesArr(
				391	NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
				392	),
				393	NormalizerImpl.QC_NFKD,
				394	NormalizerImpl.OPTIONS_COMPAT,
				395	allowMaybe,
				396	nx
				397	);
				398	}
				399
				400	protected boolean isNFSkippable(int c) {
				401	return NormalizerImpl.isNFSkippable(c, this,
				402	(NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFKD)
				403	);
				404	}
				405	}
				406
				407	/**
				408	* Canonical decomposition followed by canonical composition.
				409	* @stable ICU 2.8
				410	*/
				411	public static final Mode NFC = new NFCMode(4);
				412
				413	private static final class NFCMode extends Mode{
				414	private NFCMode(int value) {
				415	super(value);
				416	}
				417	protected int normalize(char[] src, int srcStart, int srcLimit,
				418	char[] dest,int destStart,int destLimit,
				419	UnicodeSet nx) {
				420	return NormalizerImpl.compose( src, srcStart, srcLimit,
				421	dest,destStart,destLimit,
				422	0, nx);
				423	}
				424
				425	protected String normalize( String src, int options) {
				426	return compose(src, false, options);
				427	}
				428
				429	protected int getMinC() {
				430	return NormalizerImpl.getFromIndexesArr(
				431	NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
				432	);
				433	}
				434	protected IsPrevBoundary getPrevBoundary() {
				435	return new IsPrevTrueStarter();
				436	}
				437	protected IsNextBoundary getNextBoundary() {
				438	return new IsNextTrueStarter();
				439	}
				440	protected int getMask() {
				441	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFC);
				442	}
				443	protected QuickCheckResult quickCheck(char[] src,int start,
				444	int limit,boolean allowMaybe,
				445	UnicodeSet nx) {
				446	return NormalizerImpl.quickCheck(
				447	src,start,limit,
				448	NormalizerImpl.getFromIndexesArr(
				449	NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
				450	),
				451	NormalizerImpl.QC_NFC,
				452	0,
				453	allowMaybe,
				454	nx
				455	);
				456	}
				457	protected boolean isNFSkippable(int c) {
				458	return NormalizerImpl.isNFSkippable(c,this,
				459	( NormalizerImpl.CC_MASK\|NormalizerImpl.COMBINES_ANY\|
				460	(NormalizerImpl.QC_NFC & NormalizerImpl.QC_ANY_NO)
				461	)
				462	);
				463	}
				464	};
				465
				466	/**
				467	* Compatibility decomposition followed by canonical composition.
				468	* @stable ICU 2.8
				469	*/
				470	public static final Mode NFKC =new NFKCMode(5);
				471
				472	private static final class NFKCMode extends Mode{
				473	private NFKCMode(int value) {
				474	super(value);
				475	}
				476	protected int normalize(char[] src, int srcStart, int srcLimit,
				477	char[] dest,int destStart,int destLimit,
				478	UnicodeSet nx) {
				479	return NormalizerImpl.compose(src, srcStart,srcLimit,
				480	dest, destStart,destLimit,
				481	NormalizerImpl.OPTIONS_COMPAT, nx);
				482	}
				483
				484	protected String normalize( String src, int options) {
				485	return compose(src, true, options);
				486	}
				487	protected int getMinC() {
				488	return NormalizerImpl.getFromIndexesArr(
				489	NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
				490	);
				491	}
				492	protected IsPrevBoundary getPrevBoundary() {
				493	return new IsPrevTrueStarter();
				494	}
				495	protected IsNextBoundary getNextBoundary() {
				496	return new IsNextTrueStarter();
				497	}
				498	protected int getMask() {
				499	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFKC);
				500	}
				501	protected QuickCheckResult quickCheck(char[] src,int start,
				502	int limit,boolean allowMaybe,
				503	UnicodeSet nx) {
				504	return NormalizerImpl.quickCheck(
				505	src,start,limit,
				506	NormalizerImpl.getFromIndexesArr(
				507	NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
				508	),
				509	NormalizerImpl.QC_NFKC,
				510	NormalizerImpl.OPTIONS_COMPAT,
				511	allowMaybe,
				512	nx
				513	);
				514	}
				515	protected boolean isNFSkippable(int c) {
				516	return NormalizerImpl.isNFSkippable(c, this,
				517	( NormalizerImpl.CC_MASK\|NormalizerImpl.COMBINES_ANY\|
				518	(NormalizerImpl.QC_NFKC & NormalizerImpl.QC_ANY_NO)
				519	)
				520	);
				521	}
				522	};
				523
				524	/**
				525	* Result values for quickCheck().
				526	* For details see Unicode Technical Report 15.
				527	* @stable ICU 2.8
				528	*/
				529	public static final class QuickCheckResult{
				530	private int resultValue;
				531	private QuickCheckResult(int value) {
				532	resultValue=value;
				533	}
				534	}
				535	/**
				536	* Indicates that string is not in the normalized format
				537	* @stable ICU 2.8
				538	*/
				539	public static final QuickCheckResult NO = new QuickCheckResult(0);
				540
				541	/**
				542	* Indicates that string is in the normalized format
				543	* @stable ICU 2.8
				544	*/
				545	public static final QuickCheckResult YES = new QuickCheckResult(1);
				546
				547	/**
				548	* Indicates it cannot be determined if string is in the normalized
				549	* format without further thorough checks.
				550	* @stable ICU 2.8
				551	*/
				552	public static final QuickCheckResult MAYBE = new QuickCheckResult(2);
				553
				554	//-------------------------------------------------------------------------
				555	// Constructors
				556	//-------------------------------------------------------------------------
				557
				558	/**
				559	* Creates a new <tt>Normalizer</tt> object for iterating over the
				560	* normalized form of a given string.
				561	* <p>
				562	* The <tt>options</tt> parameter specifies which optional
				563	* <tt>Normalizer</tt> features are to be enabled for this object.
				564	* <p>
				565	* @param str The string to be normalized. The normalization
				566	* will start at the beginning of the string.
				567	*
				568	* @param mode The normalization mode.
				569	*
				570	* @param opt Any optional features to be enabled.
				571	* Currently the only available option is {@link #UNICODE_3_2}.
				572	* If you want the default behavior corresponding to one of the
				573	* standard Unicode Normalization Forms, use 0 for this argument.
				574	* @stable ICU 2.6
				575	*/
				576	public NormalizerBase(String str, Mode mode, int opt) {
				577	this.text = UCharacterIterator.getInstance(str);
				578	this.mode = mode;
				579	this.options=opt;
				580	}
				581
				582	/**
				583	* Creates a new <tt>Normalizer</tt> object for iterating over the
				584	* normalized form of the given text.
				585	* <p>
				586	* @param iter The input text to be normalized. The normalization
				587	* will start at the beginning of the string.
				588	*
				589	* @param mode The normalization mode.
				590	*/
				591	public NormalizerBase(CharacterIterator iter, Mode mode) {
				592	this(iter, mode, UNICODE_LATEST);
				593	}
				594
				595	/**
				596	* Creates a new <tt>Normalizer</tt> object for iterating over the
				597	* normalized form of the given text.
				598	* <p>
				599	* @param iter The input text to be normalized. The normalization
				600	* will start at the beginning of the string.
				601	*
				602	* @param mode The normalization mode.
				603	*
				604	* @param opt Any optional features to be enabled.
				605	* Currently the only available option is {@link #UNICODE_3_2}.
				606	* If you want the default behavior corresponding to one of the
				607	* standard Unicode Normalization Forms, use 0 for this argument.
				608	* @stable ICU 2.6
				609	*/
				610	public NormalizerBase(CharacterIterator iter, Mode mode, int opt) {
				611	this.text = UCharacterIterator.getInstance(
				612	(CharacterIterator)iter.clone()
				613	);
				614	this.mode = mode;
				615	this.options = opt;
				616	}
				617
				618	/**
				619	* Clones this <tt>Normalizer</tt> object. All properties of this
				620	* object are duplicated in the new object, including the cloning of any
				621	* {@link CharacterIterator} that was passed in to the constructor
				622	* or to {@link #setText(CharacterIterator) setText}.
				623	* However, the text storage underlying
				624	* the <tt>CharacterIterator</tt> is not duplicated unless the
				625	* iterator's <tt>clone</tt> method does so.
				626	* @stable ICU 2.8
				627	*/
				628	public Object clone() {
				629	try {
				630	NormalizerBase copy = (NormalizerBase) super.clone();
				631	copy.text = (UCharacterIterator) text.clone();
				632	//clone the internal buffer
				633	if (buffer != null) {
				634	copy.buffer = new char[buffer.length];
				635	System.arraycopy(buffer,0,copy.buffer,0,buffer.length);
				636	}
				637	return copy;
				638	}
				639	catch (CloneNotSupportedException e) {
				640	throw new InternalError(e.toString());
				641	}
				642	}
				643
				644	//--------------------------------------------------------------------------
				645	// Static Utility methods
				646	//--------------------------------------------------------------------------
				647
				648	/**
				649	* Compose a string.
				650	* The string will be composed to according the the specified mode.
				651	* @param str The string to compose.
				652	* @param compat If true the string will be composed accoding to
				653	* NFKC rules and if false will be composed according to
				654	* NFC rules.
				655	* @param options The only recognized option is UNICODE_3_2
				656	* @return String The composed string
				657	* @stable ICU 2.6
				658	*/
				659	public static String compose(String str, boolean compat, int options) {
				660
				661	char[] dest, src;
				662	if (options == UNICODE_3_2_0_ORIGINAL) {
				663	String mappedStr = NormalizerImpl.convert(str);
				664	dest = new char[mappedStr.length()*MAX_BUF_SIZE_COMPOSE];
				665	src = mappedStr.toCharArray();
				666	} else {
				667	dest = new char[str.length()*MAX_BUF_SIZE_COMPOSE];
				668	src = str.toCharArray();
				669	}
				670	int destSize=0;
				671
				672	UnicodeSet nx = NormalizerImpl.getNX(options);
				673
				674	/* reset options bits that should only be set here or inside compose() */
				675	options&=~(NormalizerImpl.OPTIONS_SETS_MASK\|NormalizerImpl.OPTIONS_COMPAT\|NormalizerImpl.OPTIONS_COMPOSE_CONTIGUOUS);
				676
				677	if(compat) {
				678	options\|=NormalizerImpl.OPTIONS_COMPAT;
				679	}
				680
				681	for(;;) {
				682	destSize=NormalizerImpl.compose(src,0,src.length,
				683	dest,0,dest.length,options,
				684	nx);
				685	if(destSize<=dest.length) {
				686	return new String(dest,0,destSize);
				687	} else {
				688	dest = new char[destSize];
				689	}
				690	}
				691	}
				692
				693	private static final int MAX_BUF_SIZE_COMPOSE = 2;
				694	private static final int MAX_BUF_SIZE_DECOMPOSE = 3;
				695
				696	/**
				697	* Decompose a string.
				698	* The string will be decomposed to according the the specified mode.
				699	* @param str The string to decompose.
				700	* @param compat If true the string will be decomposed accoding to NFKD
				701	* rules and if false will be decomposed according to NFD
				702	* rules.
				703	* @return String The decomposed string
				704	* @stable ICU 2.8
				705	*/
				706	public static String decompose(String str, boolean compat) {
				707	return decompose(str,compat,UNICODE_LATEST);
				708	}
				709
				710	/**
				711	* Decompose a string.
				712	* The string will be decomposed to according the the specified mode.
				713	* @param str The string to decompose.
				714	* @param compat If true the string will be decomposed accoding to NFKD
				715	* rules and if false will be decomposed according to NFD
				716	* rules.
				717	* @param options The normalization options, ORed together (0 for no options).
				718	* @return String The decomposed string
				719	* @stable ICU 2.6
				720	*/
				721	public static String decompose(String str, boolean compat, int options) {
				722
				723	int[] trailCC = new int[1];
				724	int destSize=0;
				725	UnicodeSet nx = NormalizerImpl.getNX(options);
				726	char[] dest;
				727
				728	if (options == UNICODE_3_2_0_ORIGINAL) {
				729	String mappedStr = NormalizerImpl.convert(str);
				730	dest = new char[mappedStr.length()*MAX_BUF_SIZE_DECOMPOSE];
				731
				732	for(;;) {
				733	destSize=NormalizerImpl.decompose(mappedStr.toCharArray(),0,mappedStr.length(),
				734	dest,0,dest.length,
				735	compat,trailCC, nx);
				736	if(destSize<=dest.length) {
				737	return new String(dest,0,destSize);
				738	} else {
				739	dest = new char[destSize];
				740	}
				741	}
				742	} else {
				743	dest = new char[str.length()*MAX_BUF_SIZE_DECOMPOSE];
				744
				745	for(;;) {
				746	destSize=NormalizerImpl.decompose(str.toCharArray(),0,str.length(),
				747	dest,0,dest.length,
				748	compat,trailCC, nx);
				749	if(destSize<=dest.length) {
				750	return new String(dest,0,destSize);
				751	} else {
				752	dest = new char[destSize];
				753	}
				754	}
				755	}
				756	}
				757
				758	/**
				759	* Normalize a string.
				760	* The string will be normalized according the the specified normalization
				761	* mode and options.
				762	* @param src The char array to compose.
				763	* @param srcStart Start index of the source
				764	* @param srcLimit Limit index of the source
				765	* @param dest The char buffer to fill in
				766	* @param destStart Start index of the destination buffer
				767	* @param destLimit End index of the destination buffer
				768	* @param mode The normalization mode; one of Normalizer.NONE,
				769	* Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
				770	* Normalizer.NFKD, Normalizer.DEFAULT
				771	* @param options The normalization options, ORed together (0 for no options).
				772	* @return int The total buffer size needed;if greater than length of
				773	* result, the output was truncated.
				774	* @exception IndexOutOfBoundsException if the target capacity is
				775	* less than the required length
				776	* @stable ICU 2.6
				777	*/
				778	public static int normalize(char[] src,int srcStart, int srcLimit,
				779	char[] dest,int destStart, int destLimit,
				780	Mode mode, int options) {
				781	int length = mode.normalize(src,srcStart,srcLimit,dest,destStart,destLimit, options);
				782
				783	if(length<=(destLimit-destStart)) {
				784	return length;
				785	} else {
				786	throw new IndexOutOfBoundsException(Integer.toString(length));
				787	}
				788	}
				789
				790	//-------------------------------------------------------------------------
				791	// Iteration API
				792	//-------------------------------------------------------------------------
				793
				794	/**
				795	* Return the current character in the normalized text->
				796	* @return The codepoint as an int
				797	* @stable ICU 2.8
				798	*/
				799	public int current() {
				800	if(bufferPos<bufferLimit \|\| nextNormalize()) {
				801	return getCodePointAt(bufferPos);
				802	} else {
				803	return DONE;
				804	}
				805	}
				806
				807	/**
				808	* Return the next character in the normalized text and advance
				809	* the iteration position by one. If the end
				810	* of the text has already been reached, {@link #DONE} is returned.
				811	* @return The codepoint as an int
				812	* @stable ICU 2.8
				813	*/
				814	public int next() {
				815	if(bufferPos<bufferLimit \|\| nextNormalize()) {
				816	int c=getCodePointAt(bufferPos);
				817	bufferPos+=(c>0xFFFF) ? 2 : 1;
				818	return c;
				819	} else {
				820	return DONE;
				821	}
				822	}
				823
				824
				825	/**
				826	* Return the previous character in the normalized text and decrement
				827	* the iteration position by one. If the beginning
				828	* of the text has already been reached, {@link #DONE} is returned.
				829	* @return The codepoint as an int
				830	* @stable ICU 2.8
				831	*/
				832	public int previous() {
				833	if(bufferPos>0 \|\| previousNormalize()) {
				834	int c=getCodePointAt(bufferPos-1);
				835	bufferPos-=(c>0xFFFF) ? 2 : 1;
				836	return c;
				837	} else {
				838	return DONE;
				839	}
				840	}
				841
				842	/**
				843	* Reset the index to the beginning of the text.
				844	* This is equivalent to setIndexOnly(startIndex)).
				845	* @stable ICU 2.8
				846	*/
				847	public void reset() {
				848	text.setIndex(0);
				849	currentIndex=nextIndex=0;
				850	clearBuffer();
				851	}
				852
				853	/**
				854	* Set the iteration position in the input text that is being normalized,
				855	* without any immediate normalization.
				856	* After setIndexOnly(), getIndex() will return the same index that is
				857	* specified here.
				858	*
				859	* @param index the desired index in the input text.
				860	* @stable ICU 2.8
				861	*/
				862	public void setIndexOnly(int index) {
				863	text.setIndex(index);
				864	currentIndex=nextIndex=index; // validates index
				865	clearBuffer();
				866	}
				867
				868	/**
				869	* Set the iteration position in the input text that is being normalized
				870	* and return the first normalized character at that position.
				871	* <p>
				872	* <b>Note:</b> This method sets the position in the <em>input</em> text,
				873	* while {@link #next} and {@link #previous} iterate through characters
				874	* in the normalized <em>output</em>. This means that there is not
				875	* necessarily a one-to-one correspondence between characters returned
				876	* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
				877	* returned from <tt>setIndex</tt> and {@link #getIndex}.
				878	* <p>
				879	* @param index the desired index in the input text->
				880	*
				881	* @return the first normalized character that is the result of iterating
				882	* forward starting at the given index.
				883	*
				884	* @throws IllegalArgumentException if the given index is less than
				885	* {@link #getBeginIndex} or greater than {@link #getEndIndex}.
				886	* @return The codepoint as an int
				887	* @deprecated ICU 3.2
				888	* @obsolete ICU 3.2
				889	*/
				890	public int setIndex(int index) {
				891	setIndexOnly(index);
				892	return current();
				893	}
				894
				895	/**
				896	* Retrieve the index of the start of the input text. This is the begin
				897	* index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
				898	* <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
				899	* @deprecated ICU 2.2. Use startIndex() instead.
				900	* @return The codepoint as an int
				901	* @see #startIndex
				902	*/
				903	public int getBeginIndex() {
				904	return 0;
				905	}
				906
				907	/**
				908	* Retrieve the index of the end of the input text. This is the end index
				909	* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
				910	* over which this <tt>Normalizer</tt> is iterating
				911	* @deprecated ICU 2.2. Use endIndex() instead.
				912	* @return The codepoint as an int
				913	* @see #endIndex
				914	*/
				915	public int getEndIndex() {
				916	return endIndex();
				917	}
				918
				919	/**
				920	* Retrieve the current iteration position in the input text that is
				921	* being normalized. This method is useful in applications such as
				922	* searching, where you need to be able to determine the position in
				923	* the input text that corresponds to a given normalized output character.
				924	* <p>
				925	* <b>Note:</b> This method sets the position in the <em>input</em>, while
				926	* {@link #next} and {@link #previous} iterate through characters in the
				927	* <em>output</em>. This means that there is not necessarily a one-to-one
				928	* correspondence between characters returned by <tt>next</tt> and
				929	* <tt>previous</tt> and the indices passed to and returned from
				930	* <tt>setIndex</tt> and {@link #getIndex}.
				931	* @return The current iteration position
				932	* @stable ICU 2.8
				933	*/
				934	public int getIndex() {
				935	if(bufferPos<bufferLimit) {
				936	return currentIndex;
				937	} else {
				938	return nextIndex;
				939	}
				940	}
				941
				942	/**
				943	* Retrieve the index of the end of the input text-> This is the end index
				944	* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
				945	* over which this <tt>Normalizer</tt> is iterating
				946	* @return The current iteration position
				947	* @stable ICU 2.8
				948	*/
				949	public int endIndex() {
				950	return text.getLength();
				951	}
				952
				953	//-------------------------------------------------------------------------
				954	// Property access methods
				955	//-------------------------------------------------------------------------
				956	/**
				957	* Set the normalization mode for this object.
				958	* <p>
				959	* <b>Note:</b>If the normalization mode is changed while iterating
				960	* over a string, calls to {@link #next} and {@link #previous} may
				961	* return previously buffers characters in the old normalization mode
				962	* until the iteration is able to re-sync at the next base character.
				963	* It is safest to call {@link #setText setText()}, {@link #first},
				964	* {@link #last}, etc. after calling <tt>setMode</tt>.
				965	* <p>
				966	* @param newMode the new mode for this <tt>Normalizer</tt>.
				967	* The supported modes are:
				968	* <ul>
				969	* <li>{@link #COMPOSE} - Unicode canonical decompositiion
				970	* followed by canonical composition.
				971	* <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
				972	* follwed by canonical composition.
				973	* <li>{@link #DECOMP} - Unicode canonical decomposition
				974	* <li>{@link #DECOMP_COMPAT} - Unicode compatibility decomposition.
				975	* <li>{@link #NO_OP} - Do nothing but return characters
				976	* from the underlying input text.
				977	* </ul>
				978	*
				979	* @see #getMode
				980	* @stable ICU 2.8
				981	*/
				982	public void setMode(Mode newMode) {
				983	mode = newMode;
				984	}
				985	/**
				986	* Return the basic operation performed by this <tt>Normalizer</tt>
				987	*
				988	* @see #setMode
				989	* @stable ICU 2.8
				990	*/
				991	public Mode getMode() {
				992	return mode;
				993	}
				994
				995	/**
				996	* Set the input text over which this <tt>Normalizer</tt> will iterate.
				997	* The iteration position is set to the beginning of the input text->
				998	* @param newText The new string to be normalized.
				999	* @stable ICU 2.8
				1000	*/
				1001	public void setText(String newText) {
				1002
				1003	UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
				1004	if (newIter == null) {
				1005	throw new InternalError("Could not create a new UCharacterIterator");
				1006	}
				1007	text = newIter;
				1008	reset();
				1009	}
				1010
				1011	/**
				1012	* Set the input text over which this <tt>Normalizer</tt> will iterate.
				1013	* The iteration position is set to the beginning of the input text->
				1014	* @param newText The new string to be normalized.
				1015	* @stable ICU 2.8
				1016	*/
				1017	public void setText(CharacterIterator newText) {
				1018
				1019	UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
				1020	if (newIter == null) {
				1021	throw new InternalError("Could not create a new UCharacterIterator");
				1022	}
				1023	text = newIter;
				1024	currentIndex=nextIndex=0;
				1025	clearBuffer();
				1026	}
				1027
				1028	//-------------------------------------------------------------------------
				1029	// Private utility methods
				1030	//-------------------------------------------------------------------------
				1031
				1032
				1033	/* backward iteration --------------------------------------------------- */
				1034
				1035	/*
				1036	* read backwards and get norm32
				1037	* return 0 if the character is <minC
				1038	* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
				1039	* surrogate but read second!)
				1040	*/
				1041
				1042	private static long getPrevNorm32(UCharacterIterator src,
				1043	int/unsigned/ minC,
				1044	int/unsigned/ mask,
				1045	char[] chars) {
				1046	long norm32;
				1047	int ch=0;
				1048	/* need src.hasPrevious() */
				1049	if((ch=src.previous()) == UCharacterIterator.DONE) {
				1050	return 0;
				1051	}
				1052	chars[0]=(char)ch;
				1053	chars[1]=0;
				1054
				1055	/* check for a surrogate before getting norm32 to see if we need to
				1056	* predecrement further */
				1057	if(chars[0]<minC) {
				1058	return 0;
				1059	} else if(!UTF16.isSurrogate(chars[0])) {
				1060	return NormalizerImpl.getNorm32(chars[0]);
				1061	} else if(UTF16.isLeadSurrogate(chars[0]) \|\| (src.getIndex()==0)) {
				1062	/* unpaired surrogate */
				1063	chars[1]=(char)src.current();
				1064	return 0;
				1065	} else if(UTF16.isLeadSurrogate(chars[1]=(char)src.previous())) {
				1066	norm32=NormalizerImpl.getNorm32(chars[1]);
				1067	if((norm32&mask)==0) {
				1068	/* all surrogate pairs with this lead surrogate have irrelevant
				1069	* data */
				1070	return 0;
				1071	} else {
				1072	/* norm32 must be a surrogate special */
				1073	return NormalizerImpl.getNorm32FromSurrogatePair(norm32,chars[0]);
				1074	}
				1075	} else {
				1076	/* unpaired second surrogate, undo the c2=src.previous() movement */
				1077	src.moveIndex( 1);
				1078	return 0;
				1079	}
				1080	}
				1081
				1082	private interface IsPrevBoundary{
				1083	public boolean isPrevBoundary(UCharacterIterator src,
				1084	int/unsigned/ minC,
				1085	int/unsigned/ mask,
				1086	char[] chars);
				1087	}
				1088	private static final class IsPrevNFDSafe implements IsPrevBoundary{
				1089	/*
				1090	* for NF*D:
				1091	* read backwards and check if the lead combining class is 0
				1092	* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
				1093	* surrogate but read second!)
				1094	*/
				1095	public boolean isPrevBoundary(UCharacterIterator src,
				1096	int/unsigned/ minC,
				1097	int/unsigned/ ccOrQCMask,
				1098	char[] chars) {
				1099
				1100	return NormalizerImpl.isNFDSafe(getPrevNorm32(src, minC,
				1101	ccOrQCMask, chars),
				1102	ccOrQCMask,
				1103	ccOrQCMask& NormalizerImpl.QC_MASK);
				1104	}
				1105	}
				1106
				1107	private static final class IsPrevTrueStarter implements IsPrevBoundary{
				1108	/*
				1109	* read backwards and check if the character is (or its decomposition
				1110	* begins with) a "true starter" (cc==0 and NF*C_YES)
				1111	* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
				1112	* surrogate but read second!)
				1113	*/
				1114	public boolean isPrevBoundary(UCharacterIterator src,
				1115	int/unsigned/ minC,
				1116	int/unsigned/ ccOrQCMask,
				1117	char[] chars) {
				1118	long norm32;
				1119	int/unsigned/ decompQCMask;
				1120
				1121	decompQCMask=(ccOrQCMask<<2)&0xf; /decomposition quick check mask/
				1122	norm32=getPrevNorm32(src, minC, ccOrQCMask\|decompQCMask, chars);
				1123	return NormalizerImpl.isTrueStarter(norm32,ccOrQCMask,decompQCMask);
				1124	}
				1125	}
				1126
				1127	private static int findPreviousIterationBoundary(UCharacterIterator src,
				1128	IsPrevBoundary obj,
				1129	int/unsigned/ minC,
				1130	int/mask/ mask,
				1131	char[] buffer,
				1132	int[] startIndex) {
				1133	char[] chars=new char[2];
				1134	boolean isBoundary;
				1135
				1136	/* fill the buffer from the end backwards */
				1137	startIndex[0] = buffer.length;
				1138	chars[0]=0;
				1139	while(src.getIndex()>0 && chars[0]!=UCharacterIterator.DONE) {
				1140	isBoundary=obj.isPrevBoundary(src, minC, mask, chars);
				1141
				1142	/* always write this character to the front of the buffer */
				1143	/* make sure there is enough space in the buffer */
				1144	if(startIndex[0] < (chars[1]==0 ? 1 : 2)) {
				1145
				1146	// grow the buffer
				1147	char[] newBuf = new char[buffer.length*2];
				1148	/* move the current buffer contents up */
				1149	System.arraycopy(buffer,startIndex[0],newBuf,
				1150	newBuf.length-(buffer.length-startIndex[0]),
				1151	buffer.length-startIndex[0]);
				1152	//adjust the startIndex
				1153	startIndex[0]+=newBuf.length-buffer.length;
				1154
				1155	buffer=newBuf;
				1156	newBuf=null;
				1157
				1158	}
				1159
				1160	buffer[--startIndex[0]]=chars[0];
				1161	if(chars[1]!=0) {
				1162	buffer[--startIndex[0]]=chars[1];
				1163	}
				1164
				1165	/* stop if this just-copied character is a boundary */
				1166	if(isBoundary) {
				1167	break;
				1168	}
				1169	}
				1170
				1171	/* return the length of the buffer contents */
				1172	return buffer.length-startIndex[0];
				1173	}
				1174
				1175	private static int previous(UCharacterIterator src,
				1176	char[] dest, int destStart, int destLimit,
				1177	Mode mode,
				1178	boolean doNormalize,
				1179	boolean[] pNeededToNormalize,
				1180	int options) {
				1181
				1182	IsPrevBoundary isPreviousBoundary;
				1183	int destLength, bufferLength;
				1184	int/unsigned/ mask;
				1185	int c,c2;
				1186
				1187	char minC;
				1188	int destCapacity = destLimit-destStart;
				1189	destLength=0;
				1190
				1191	if(pNeededToNormalize!=null) {
				1192	pNeededToNormalize[0]=false;
				1193	}
				1194	minC = (char)mode.getMinC();
				1195	mask = mode.getMask();
				1196	isPreviousBoundary = mode.getPrevBoundary();
				1197
				1198	if(isPreviousBoundary==null) {
				1199	destLength=0;
				1200	if((c=src.previous())>=0) {
				1201	destLength=1;
				1202	if(UTF16.isTrailSurrogate((char)c)) {
				1203	c2= src.previous();
				1204	if(c2!= UCharacterIterator.DONE) {
				1205	if(UTF16.isLeadSurrogate((char)c2)) {
				1206	if(destCapacity>=2) {
				1207	dest[1]=(char)c; // trail surrogate
				1208	destLength=2;
				1209	}
				1210	// lead surrogate to be written below
				1211	c=c2;
				1212	} else {
				1213	src.moveIndex(1);
				1214	}
				1215	}
				1216	}
				1217
				1218	if(destCapacity>0) {
				1219	dest[0]=(char)c;
				1220	}
				1221	}
				1222	return destLength;
				1223	}
				1224
				1225	char[] buffer = new char[100];
				1226	int[] startIndex= new int[1];
				1227	bufferLength=findPreviousIterationBoundary(src,
				1228	isPreviousBoundary,
				1229	minC, mask,buffer,
				1230	startIndex);
				1231	if(bufferLength>0) {
				1232	if(doNormalize) {
				1233	destLength=NormalizerBase.normalize(buffer,startIndex[0],
				1234	startIndex[0]+bufferLength,
				1235	dest, destStart,destLimit,
				1236	mode, options);
				1237
				1238	if(pNeededToNormalize!=null) {
				1239	pNeededToNormalize[0]=(boolean)(destLength!=bufferLength \|\|
				1240	Utility.arrayRegionMatches(
				1241	buffer,0,dest,
				1242	destStart,destLimit
				1243	));
				1244	}
				1245	} else {
				1246	/* just copy the source characters */
				1247	if(destCapacity>0) {
				1248	System.arraycopy(buffer,startIndex[0],dest,0,
				1249	(bufferLength<destCapacity) ?
				1250	bufferLength : destCapacity
				1251	);
				1252	}
				1253	}
				1254	}
				1255
				1256
				1257	return destLength;
				1258	}
				1259
				1260
				1261
				1262	/* forward iteration ---------------------------------------------------- */
				1263	/*
				1264	* read forward and check if the character is a next-iteration boundary
				1265	* if c2!=0 then (c, c2) is a surrogate pair
				1266	*/
				1267	private interface IsNextBoundary{
				1268	boolean isNextBoundary(UCharacterIterator src,
				1269	int/unsigned/ minC,
				1270	int/unsigned/ mask,
				1271	int[] chars);
				1272	}
				1273	/*
				1274	* read forward and get norm32
				1275	* return 0 if the character is <minC
				1276	* if c2!=0 then (c2, c) is a surrogate pair
				1277	* always reads complete characters
				1278	*/
				1279	private static long /unsigned/ getNextNorm32(UCharacterIterator src,
				1280	int/unsigned/ minC,
				1281	int/unsigned/ mask,
				1282	int[] chars) {
				1283	long norm32;
				1284
				1285	/* need src.hasNext() to be true */
				1286	chars[0]=src.next();
				1287	chars[1]=0;
				1288
				1289	if(chars[0]<minC) {
				1290	return 0;
				1291	}
				1292
				1293	norm32=NormalizerImpl.getNorm32((char)chars[0]);
				1294	if(UTF16.isLeadSurrogate((char)chars[0])) {
				1295	if(src.current()!=UCharacterIterator.DONE &&
				1296	UTF16.isTrailSurrogate((char)(chars[1]=src.current()))) {
				1297	src.moveIndex(1); /* skip the c2 surrogate */
				1298	if((norm32&mask)==0) {
				1299	/* irrelevant data */
				1300	return 0;
				1301	} else {
				1302	/* norm32 must be a surrogate special */
				1303	return NormalizerImpl.getNorm32FromSurrogatePair(norm32,(char)chars[1]);
				1304	}
				1305	} else {
				1306	/* unmatched surrogate */
				1307	return 0;
				1308	}
				1309	}
				1310	return norm32;
				1311	}
				1312
				1313
				1314	/*
				1315	* for NF*D:
				1316	* read forward and check if the lead combining class is 0
				1317	* if c2!=0 then (c, c2) is a surrogate pair
				1318	*/
				1319	private static final class IsNextNFDSafe implements IsNextBoundary{
				1320	public boolean isNextBoundary(UCharacterIterator src,
				1321	int/unsigned/ minC,
				1322	int/unsigned/ ccOrQCMask,
				1323	int[] chars) {
				1324	return NormalizerImpl.isNFDSafe(getNextNorm32(src,minC,ccOrQCMask,chars),
				1325	ccOrQCMask, ccOrQCMask&NormalizerImpl.QC_MASK);
				1326	}
				1327	}
				1328
				1329	/*
				1330	* for NF*C:
				1331	* read forward and check if the character is (or its decomposition begins
				1332	* with) a "true starter" (cc==0 and NF*C_YES)
				1333	* if c2!=0 then (c, c2) is a surrogate pair
				1334	*/
				1335	private static final class IsNextTrueStarter implements IsNextBoundary{
				1336	public boolean isNextBoundary(UCharacterIterator src,
				1337	int/unsigned/ minC,
				1338	int/unsigned/ ccOrQCMask,
				1339	int[] chars) {
				1340	long norm32;
				1341	int/unsigned/ decompQCMask;
				1342
				1343	decompQCMask=(ccOrQCMask<<2)&0xf; /decomposition quick check mask/
				1344	norm32=getNextNorm32(src, minC, ccOrQCMask\|decompQCMask, chars);
				1345	return NormalizerImpl.isTrueStarter(norm32, ccOrQCMask, decompQCMask);
				1346	}
				1347	}
				1348
				1349	private static int findNextIterationBoundary(UCharacterIterator src,
				1350	IsNextBoundary obj,
				1351	int/unsigned/ minC,
				1352	int/unsigned/ mask,
				1353	char[] buffer) {
				1354	if(src.current()==UCharacterIterator.DONE) {
				1355	return 0;
				1356	}
				1357
				1358	/* get one character and ignore its properties */
				1359	int[] chars = new int[2];
				1360	chars[0]=src.next();
				1361	buffer[0]=(char)chars[0];
				1362	int bufferIndex = 1;
				1363
				1364	if(UTF16.isLeadSurrogate((char)chars[0])&&
				1365	src.current()!=UCharacterIterator.DONE) {
				1366	if(UTF16.isTrailSurrogate((char)(chars[1]=src.next()))) {
				1367	buffer[bufferIndex++]=(char)chars[1];
				1368	} else {
				1369	src.moveIndex(-1); /* back out the non-trail-surrogate */
				1370	}
				1371	}
				1372
				1373	/* get all following characters until we see a boundary */
				1374	/* checking hasNext() instead of c!=DONE on the off-chance that U+ffff
				1375	* is part of the string */
				1376	while( src.current()!=UCharacterIterator.DONE) {
				1377	if(obj.isNextBoundary(src, minC, mask, chars)) {
				1378	/* back out the latest movement to stop at the boundary */
				1379	src.moveIndex(chars[1]==0 ? -1 : -2);
				1380	break;
				1381	} else {
				1382	if(bufferIndex+(chars[1]==0 ? 1 : 2)<=buffer.length) {
				1383	buffer[bufferIndex++]=(char)chars[0];
				1384	if(chars[1]!=0) {
				1385	buffer[bufferIndex++]=(char)chars[1];
				1386	}
				1387	} else {
				1388	char[] newBuf = new char[buffer.length*2];
				1389	System.arraycopy(buffer,0,newBuf,0,bufferIndex);
				1390	buffer = newBuf;
				1391	buffer[bufferIndex++]=(char)chars[0];
				1392	if(chars[1]!=0) {
				1393	buffer[bufferIndex++]=(char)chars[1];
				1394	}
				1395	}
				1396	}
				1397	}
				1398
				1399	/* return the length of the buffer contents */
				1400	return bufferIndex;
				1401	}
				1402
				1403	private static int next(UCharacterIterator src,
				1404	char[] dest, int destStart, int destLimit,
				1405	NormalizerBase.Mode mode,
				1406	boolean doNormalize,
				1407	boolean[] pNeededToNormalize,
				1408	int options) {
				1409
				1410	IsNextBoundary isNextBoundary;
				1411	int /unsigned/ mask;
				1412	int /unsigned/ bufferLength;
				1413	int c,c2;
				1414	char minC;
				1415	int destCapacity = destLimit - destStart;
				1416	int destLength = 0;
				1417	if(pNeededToNormalize!=null) {
				1418	pNeededToNormalize[0]=false;
				1419	}
				1420
				1421	minC = (char)mode.getMinC();
				1422	mask = mode.getMask();
				1423	isNextBoundary = mode.getNextBoundary();
				1424
				1425	if(isNextBoundary==null) {
				1426	destLength=0;
				1427	c=src.next();
				1428	if(c!=UCharacterIterator.DONE) {
				1429	destLength=1;
				1430	if(UTF16.isLeadSurrogate((char)c)) {
				1431	c2= src.next();
				1432	if(c2!= UCharacterIterator.DONE) {
				1433	if(UTF16.isTrailSurrogate((char)c2)) {
				1434	if(destCapacity>=2) {
				1435	dest[1]=(char)c2; // trail surrogate
				1436	destLength=2;
				1437	}
				1438	// lead surrogate to be written below
				1439	} else {
				1440	src.moveIndex(-1);
				1441	}
				1442	}
				1443	}
				1444
				1445	if(destCapacity>0) {
				1446	dest[0]=(char)c;
				1447	}
				1448	}
				1449	return destLength;
				1450	}
				1451
				1452	char[] buffer=new char[100];
				1453	int[] startIndex = new int[1];
				1454	bufferLength=findNextIterationBoundary(src,isNextBoundary, minC, mask,
				1455	buffer);
				1456	if(bufferLength>0) {
				1457	if(doNormalize) {
				1458	destLength=mode.normalize(buffer,startIndex[0],bufferLength,
				1459	dest,destStart,destLimit, options);
				1460
				1461	if(pNeededToNormalize!=null) {
				1462	pNeededToNormalize[0]=(boolean)(destLength!=bufferLength \|\|
				1463	Utility.arrayRegionMatches(buffer,startIndex[0],
				1464	dest,destStart,
				1465	destLength));
				1466	}
				1467	} else {
				1468	/* just copy the source characters */
				1469	if(destCapacity>0) {
				1470	System.arraycopy(buffer,0,dest,destStart,
				1471	Math.min(bufferLength,destCapacity)
				1472	);
				1473	}
				1474
				1475
				1476	}
				1477	}
				1478	return destLength;
				1479	}
				1480
				1481	private void clearBuffer() {
				1482	bufferLimit=bufferStart=bufferPos=0;
				1483	}
				1484
				1485	private boolean nextNormalize() {
				1486
				1487	clearBuffer();
				1488	currentIndex=nextIndex;
				1489	text.setIndex(nextIndex);
				1490
				1491	bufferLimit=next(text,buffer,bufferStart,buffer.length,mode,true,null,options);
				1492
				1493	nextIndex=text.getIndex();
				1494	return (bufferLimit>0);
				1495	}
				1496
				1497	private boolean previousNormalize() {
				1498
				1499	clearBuffer();
				1500	nextIndex=currentIndex;
				1501	text.setIndex(currentIndex);
				1502	bufferLimit=previous(text,buffer,bufferStart,buffer.length,mode,true,null,options);
				1503
				1504	currentIndex=text.getIndex();
				1505	bufferPos = bufferLimit;
				1506	return bufferLimit>0;
				1507	}
				1508
				1509	private int getCodePointAt(int index) {
				1510	if( UTF16.isSurrogate(buffer[index])) {
				1511	if(UTF16.isLeadSurrogate(buffer[index])) {
				1512	if((index+1)<bufferLimit &&
				1513	UTF16.isTrailSurrogate(buffer[index+1])) {
				1514	return UCharacterProperty.getRawSupplementary(
				1515	buffer[index],
				1516	buffer[index+1]
				1517	);
				1518	}
				1519	}else if(UTF16.isTrailSurrogate(buffer[index])) {
				1520	if(index>0 && UTF16.isLeadSurrogate(buffer[index-1])) {
				1521	return UCharacterProperty.getRawSupplementary(
				1522	buffer[index-1],
				1523	buffer[index]
				1524	);
				1525	}
				1526	}
				1527	}
				1528	return buffer[index];
				1529
				1530	}
				1531
				1532	/**
				1533	* Internal API
				1534	* @internal
				1535	*/
				1536	public static boolean isNFSkippable(int c, Mode mode) {
				1537	return mode.isNFSkippable(c);
				1538	}
				1539
				1540	//
				1541	// Options
				1542	//
				1543
				1544	/*
				1545	* Default option for Unicode 3.2.0 normalization.
				1546	* Corrigendum 4 was fixed in Unicode 3.2.0 but isn't supported in
				1547	* IDNA/StringPrep.
				1548	* The public review issue #29 was fixed in Unicode 4.1.0. Corrigendum 5
				1549	* allowed Unicode 3.2 to 4.0.1 to apply the fix for PRI #29, but it isn't
				1550	* supported by IDNA/StringPrep as well as Corrigendum 4.
				1551	*/
				1552	public static final int UNICODE_3_2_0_ORIGINAL =
				1553	UNICODE_3_2 \|
				1554	NormalizerImpl.WITHOUT_CORRIGENDUM4_CORRECTIONS \|
				1555	NormalizerImpl.BEFORE_PRI_29;
				1556
				1557	/*
				1558	* Default option for the latest Unicode normalization. This option is
				1559	* provided mainly for testing.
				1560	* The value zero means that normalization is done with the fixes for
				1561	* - Corrigendum 4 (Five CJK Canonical Mapping Errors)
				1562	* - Corrigendum 5 (Normalization Idempotency)
				1563	*/
				1564	public static final int UNICODE_LATEST = 0x00;
				1565
				1566	//
				1567	// public constructor and methods for java.text.Normalizer and
				1568	// sun.text.Normalizer
				1569	//
				1570
				1571	/**
				1572	* Creates a new <tt>Normalizer</tt> object for iterating over the
				1573	* normalized form of a given string.
				1574	*
				1575	* @param str The string to be normalized. The normalization
				1576	* will start at the beginning of the string.
				1577	*
				1578	* @param mode The normalization mode.
				1579	*/
				1580	public NormalizerBase(String str, Mode mode) {
				1581	this(str, mode, UNICODE_LATEST);
				1582	}
				1583
				1584	/**
				1585	* Normalizes a <code>String</code> using the given normalization form.
				1586	*
				1587	* @param str the input string to be normalized.
				1588	* @param form the normalization form
				1589	*/
				1590	public static String normalize(String str, Normalizer.Form form) {
				1591	return normalize(str, form, UNICODE_LATEST);
				1592	}
				1593
				1594	/**
				1595	* Normalizes a <code>String</code> using the given normalization form.
				1596	*
				1597	* @param str the input string to be normalized.
				1598	* @param form the normalization form
				1599	* @param options the optional features to be enabled.
				1600	*/
				1601	public static String normalize(String str, Normalizer.Form form, int options) {
				1602	switch (form) {
				1603	case NFC :
				1604	return NFC.normalize(str, options);
				1605	case NFD :
				1606	return NFD.normalize(str, options);
				1607	case NFKC :
				1608	return NFKC.normalize(str, options);
				1609	case NFKD :
				1610	return NFKD.normalize(str, options);
				1611	}
				1612
				1613	throw new IllegalArgumentException("Unexpected normalization form: " +
				1614	form);
				1615	}
				1616
				1617	/**
				1618	* Test if a string is in a given normalization form.
				1619	* This is semantically equivalent to source.equals(normalize(source, mode)).
				1620	*
				1621	* Unlike quickCheck(), this function returns a definitive result,
				1622	* never a "maybe".
				1623	* For NFD, NFKD, and FCD, both functions work exactly the same.
				1624	* For NFC and NFKC where quickCheck may return "maybe", this function will
				1625	* perform further tests to arrive at a true/false result.
				1626	* @param str the input string to be checked to see if it is normalized
				1627	* @param form the normalization form
				1628	* @param options the optional features to be enabled.
				1629	*/
				1630	public static boolean isNormalized(String str, Normalizer.Form form) {
				1631	return isNormalized(str, form, UNICODE_LATEST);
				1632	}
				1633
				1634	/**
				1635	* Test if a string is in a given normalization form.
				1636	* This is semantically equivalent to source.equals(normalize(source, mode)).
				1637	*
				1638	* Unlike quickCheck(), this function returns a definitive result,
				1639	* never a "maybe".
				1640	* For NFD, NFKD, and FCD, both functions work exactly the same.
				1641	* For NFC and NFKC where quickCheck may return "maybe", this function will
				1642	* perform further tests to arrive at a true/false result.
				1643	* @param str the input string to be checked to see if it is normalized
				1644	* @param form the normalization form
				1645	* @param options the optional features to be enabled.
				1646	*/
				1647	public static boolean isNormalized(String str, Normalizer.Form form, int options) {
				1648	switch (form) {
				1649	case NFC:
				1650	return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
				1651	case NFD:
				1652	return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
				1653	case NFKC:
				1654	return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
				1655	case NFKD:
				1656	return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
				1657	}
				1658
				1659	throw new IllegalArgumentException("Unexpected normalization form: " +
				1660	form);
				1661	}
				1662	}