src/java.base/share/classes/sun/text/normalizer/NormalizerImpl.java - platform/libcore - Gitiles

 /*
  * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */

 /*
  *******************************************************************************
  *   Copyright (C) 2009-2014, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *******************************************************************************
  */

 package sun.text.normalizer;

 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.text.Normalizer;

 // Original filename in ICU4J: Normalizer2Impl.java
 public final class NormalizerImpl {

     public static final class Hangul {
         /* Korean Hangul and Jamo constants */
         public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
         public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
         public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */

         public static final int HANGUL_BASE=0xac00;
         public static final int HANGUL_END=0xd7a3;

         public static final int JAMO_L_COUNT=19;
         public static final int JAMO_V_COUNT=21;
         public static final int JAMO_T_COUNT=28;

         public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
         public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;

         public static boolean isHangul(int c) {
             return HANGUL_BASE<=c && c<HANGUL_LIMIT;
         }

         public static boolean isHangulWithoutJamoT(char c) {
             c-=HANGUL_BASE;
             return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
         }

         /**
          * Decomposes c, which must be a Hangul syllable, into buffer
          * and returns the length of the decomposition (2 or 3).
          */
         public static int decompose(int c, Appendable buffer) {
             try {
                 c-=HANGUL_BASE;
                 int c2=c%JAMO_T_COUNT;
                 c/=JAMO_T_COUNT;
                 buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
                 buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
                 if(c2==0) {
                     return 2;
                 } else {
                     buffer.append((char)(JAMO_T_BASE+c2));
                     return 3;
                 }
             } catch(IOException e) {
                 throw new InternalError(e);
             }
         }
     }

     /**
      * Writable buffer that takes care of canonical ordering.
      * Its Appendable methods behave like the C++ implementation's
      * appendZeroCC() methods.
      * <p>
      * If dest is a StringBuilder, then the buffer writes directly to it.
      * Otherwise, the buffer maintains a StringBuilder for intermediate text segments
      * until no further changes are necessary and whole segments are appended.
      * append() methods that take combining-class values always write to the StringBuilder.
      * Other append() methods flush and append to the Appendable.
      */
     public static final class ReorderingBuffer implements Appendable {
         public ReorderingBuffer(NormalizerImpl ni, Appendable dest, int destCapacity) {
             impl=ni;
             app=dest;
             if (app instanceof StringBuilder) {
                 appIsStringBuilder=true;
                 str=(StringBuilder)dest;
                 // In Java, the constructor subsumes public void init(int destCapacity)
                 str.ensureCapacity(destCapacity);
                 reorderStart=0;
                 if(str.length()==0) {
                     lastCC=0;
                 } else {
                     setIterator();
                     lastCC=previousCC();
                     // Set reorderStart after the last code point with cc<=1 if there is one.
                     if(lastCC>1) {
                         while(previousCC()>1) {}
                     }
                     reorderStart=codePointLimit;
                 }
             } else {
                 appIsStringBuilder=false;
                 str=new StringBuilder();
                 reorderStart=0;
                 lastCC=0;
             }
         }

         public boolean isEmpty() { return str.length()==0; }
         public int length() { return str.length(); }
         public int getLastCC() { return lastCC; }

         public StringBuilder getStringBuilder() { return str; }

         public boolean equals(CharSequence s, int start, int limit) {
             return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
         }

         // For Hangul composition, replacing the Leading consonant Jamo with the syllable.
         public void setLastChar(char c) {
             str.setCharAt(str.length()-1, c);
         }

         public void append(int c, int cc) {
             if(lastCC<=cc || cc==0) {
                 str.appendCodePoint(c);
                 lastCC=cc;
                 if(cc<=1) {
                     reorderStart=str.length();
                 }
             } else {
                 insert(c, cc);
             }
         }

         // s must be in NFD, otherwise change the implementation.
         public void append(CharSequence s, int start, int limit,
                            int leadCC, int trailCC) {
             if(start==limit) {
                 return;
             }
             if(lastCC<=leadCC || leadCC==0) {
                 if(trailCC<=1) {
                     reorderStart=str.length()+(limit-start);
                 } else if(leadCC<=1) {
                     reorderStart=str.length()+1;  // Ok if not a code point boundary.
                 }
                 str.append(s, start, limit);
                 lastCC=trailCC;
             } else {
                 int c=Character.codePointAt(s, start);
                 start+=Character.charCount(c);
                 insert(c, leadCC);  // insert first code point
                 while(start<limit) {
                     c=Character.codePointAt(s, start);
                     start+=Character.charCount(c);
                     if(start<limit) {
                         // s must be in NFD, otherwise we need to use getCC().
                         leadCC=getCCFromYesOrMaybe(impl.getNorm16(c));
                     } else {
                         leadCC=trailCC;
                     }
                     append(c, leadCC);
                 }
             }
         }

         // The following append() methods work like C++ appendZeroCC().
         // They assume that the cc or trailCC of their input is 0.
         // Most of them implement Appendable interface methods.
         // @Override when we switch to Java 6
         public ReorderingBuffer append(char c) {
             str.append(c);
             lastCC=0;
             reorderStart=str.length();
             return this;
         }

         public void appendZeroCC(int c) {
             str.appendCodePoint(c);
             lastCC=0;
             reorderStart=str.length();
         }

         // @Override when we switch to Java 6
         public ReorderingBuffer append(CharSequence s) {
             if(s.length()!=0) {
                 str.append(s);
                 lastCC=0;
                 reorderStart=str.length();
             }
             return this;
         }

         // @Override when we switch to Java 6
         public ReorderingBuffer append(CharSequence s, int start, int limit) {
             if(start!=limit) {
                 str.append(s, start, limit);
                 lastCC=0;
                 reorderStart=str.length();
             }
             return this;
         }

         /**
          * Flushes from the intermediate StringBuilder to the Appendable,
          * if they are different objects.
          * Used after recomposition.
          * Must be called at the end when writing to a non-StringBuilder Appendable.
          */
         public void flush() {
             if(appIsStringBuilder) {
                 reorderStart=str.length();
             } else {
                 try {
                     app.append(str);
                     str.setLength(0);
                     reorderStart=0;
                 } catch(IOException e) {
                     throw new InternalError(e);  // Avoid declaring "throws IOException".
                 }
             }
             lastCC=0;
         }

         /**
          * Flushes from the intermediate StringBuilder to the Appendable,
          * if they are different objects.
          * Then appends the new text to the Appendable or StringBuilder.
          * Normally used after quick check loops find a non-empty sequence.
          */
         public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) {
             if(appIsStringBuilder) {
                 str.append(s, start, limit);
                 reorderStart=str.length();
             } else {
                 try {
                     app.append(str).append(s, start, limit);
                     str.setLength(0);
                     reorderStart=0;
                 } catch(IOException e) {
                     throw new InternalError(e);  // Avoid declaring "throws IOException".
                 }
             }
             lastCC=0;
             return this;
         }

         public void remove() {
             str.setLength(0);
             lastCC=0;
             reorderStart=0;
         }

         public void removeSuffix(int suffixLength) {
             int oldLength=str.length();
             str.delete(oldLength-suffixLength, oldLength);
             lastCC=0;
             reorderStart=str.length();
         }

         // Inserts c somewhere before the last character.
         // Requires 0<cc<lastCC which implies reorderStart<limit.
         private void insert(int c, int cc) {
             for(setIterator(), skipPrevious(); previousCC()>cc;) {}
             // insert c at codePointLimit, after the character with prevCC<=cc
             if(c<=0xffff) {
                 str.insert(codePointLimit, (char)c);
                 if(cc<=1) {
                     reorderStart=codePointLimit+1;
                 }
             } else {
                 str.insert(codePointLimit, Character.toChars(c));
                 if(cc<=1) {
                     reorderStart=codePointLimit+2;
                 }
             }
         }

         private final NormalizerImpl impl;
         private final Appendable app;
         private final StringBuilder str;
         private final boolean appIsStringBuilder;
         private int reorderStart;
         private int lastCC;

         // private backward iterator
         private void setIterator() { codePointStart=str.length(); }
         private void skipPrevious() {  // Requires 0<codePointStart.
             codePointLimit=codePointStart;
             codePointStart=str.offsetByCodePoints(codePointStart, -1);
         }
         private int previousCC() {  // Returns 0 if there is no previous character.
             codePointLimit=codePointStart;
             if(reorderStart>=codePointStart) {
                 return 0;
             }
             int c=str.codePointBefore(codePointStart);
             codePointStart-=Character.charCount(c);
             if(c<MIN_CCC_LCCC_CP) {
                 return 0;
             }
             return getCCFromYesOrMaybe(impl.getNorm16(c));
         }

         private int codePointStart, codePointLimit;
     }

     // TODO: Propose as public API on the UTF16 class.
     // TODO: Propose widening UTF16 methods that take char to take int.
     // TODO: Propose widening UTF16 methods that take String to take CharSequence.
     public static final class UTF16Plus {
         /**
          * Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
          * is it a lead surrogate?
          * @param c code unit or code point
          * @return true or false
          */
         public static boolean isSurrogateLead(int c) { return (c&0x400)==0; }

         /**
          * Compares two CharSequence subsequences for binary equality.
          * @param s1 first sequence
          * @param start1 start offset in first sequence
          * @param limit1 limit offset in first sequence
          * @param s2 second sequence
          * @param start2 start offset in second sequence
          * @param limit2 limit offset in second sequence
          * @return true if s1.subSequence(start1, limit1) contains the same text
          *              as s2.subSequence(start2, limit2)
          */
         public static boolean equal(CharSequence s1, int start1, int limit1,
                                     CharSequence s2, int start2, int limit2) {
             if((limit1-start1)!=(limit2-start2)) {
                 return false;
             }
             if(s1==s2 && start1==start2) {
                 return true;
             }
             while(start1<limit1) {
                 if(s1.charAt(start1++)!=s2.charAt(start2++)) {
                     return false;
                 }
             }
             return true;
         }
     }

     public NormalizerImpl() {}

     private static final class IsAcceptable implements ICUBinary.Authenticate {
         // @Override when we switch to Java 6
         public boolean isDataVersionAcceptable(byte version[]) {
             return version[0]==2;
         }
     }

     private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
     private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"

     public NormalizerImpl load(ByteBuffer bytes) {
         try {
             dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
             int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
             if(indexesLength<=IX_MIN_MAYBE_YES) {
                 throw new IOException("Normalizer2 data: not enough indexes");
             }
             int[] inIndexes=new int[indexesLength];
             inIndexes[0]=indexesLength*4;
             for(int i=1; i<indexesLength; ++i) {
                 inIndexes[i]=bytes.getInt();
             }

             minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
             minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];

             minYesNo=inIndexes[IX_MIN_YES_NO];
             minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
             minNoNo=inIndexes[IX_MIN_NO_NO];
             limitNoNo=inIndexes[IX_LIMIT_NO_NO];
             minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];

             // Read the normTrie.
             int offset=inIndexes[IX_NORM_TRIE_OFFSET];
             int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
             normTrie=Trie2_16.createFromSerialized(bytes);
             int trieLength=normTrie.getSerializedLength();
             if(trieLength>(nextOffset-offset)) {
                 throw new IOException("Normalizer2 data: not enough bytes for normTrie");
             }
             ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes

             // Read the composition and mapping data.
             offset=nextOffset;
             nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
             int numChars=(nextOffset-offset)/2;
             char[] chars;
             if(numChars!=0) {
                 chars=new char[numChars];
                 for(int i=0; i<numChars; ++i) {
                     chars[i]=bytes.getChar();
                 }
                 maybeYesCompositions=new String(chars);
                 extraData=maybeYesCompositions.substring(MIN_NORMAL_MAYBE_YES-minMaybeYes);
             }

             // smallFCD: new in formatVersion 2
             offset=nextOffset;
             smallFCD=new byte[0x100];
             for(int i=0; i<0x100; ++i) {
                 smallFCD[i]=bytes.get();
             }

             // Build tccc180[].
             // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
             tccc180=new int[0x180];
             int bits=0;
             for(int c=0; c<0x180; bits>>=1) {
                 if((c&0xff)==0) {
                     bits=smallFCD[c>>8];  // one byte per 0x100 code points
                 }
                 if((bits&1)!=0) {
                     for(int i=0; i<0x20; ++i, ++c) {
                         tccc180[c]=getFCD16FromNormData(c)&0xff;
                     }
                 } else {
                     c+=0x20;
                 }
             }

             return this;
         } catch(IOException e) {
             throw new InternalError(e);
         }
     }

     public NormalizerImpl load(String name) {
         return load(ICUBinary.getRequiredData(name));
     }

     public int getNorm16(int c) {
         return normTrie.get(c);
     }

     public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }

     public int getCC(int norm16) {
         if(norm16>=MIN_NORMAL_MAYBE_YES) {
             return norm16&0xff;
         }
         if(norm16<minNoNo || limitNoNo<=norm16) {
             return 0;
         }
         return getCCFromNoNo(norm16);
     }

     public static int getCCFromYesOrMaybe(int norm16) {
         return norm16>=MIN_NORMAL_MAYBE_YES ? norm16&0xff : 0;
     }

     /**
      * Returns the FCD data for code point c.
      * @param c A Unicode code point.
      * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
      */
     public int getFCD16(int c) {
         if(c<0) {
             return 0;
         } else if(c<0x180) {
             return tccc180[c];
         } else if(c<=0xffff) {
             if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
         }
         return getFCD16FromNormData(c);
     }

     /** Returns the FCD data for U+0000<=c<U+0180. */
     public int getFCD16FromBelow180(int c) { return tccc180[c]; }
     /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
     public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
         // 0<=lead<=0xffff
         byte bits=smallFCD[lead>>8];
         if(bits==0) { return false; }
         return ((bits>>((lead>>5)&7))&1)!=0;
     }

     /** Gets the FCD value from the regular normalization data. */
     public int getFCD16FromNormData(int c) {
         // Only loops for 1:1 algorithmic mappings.
         for(;;) {
             int norm16=getNorm16(c);
             if(norm16<=minYesNo) {
                 // no decomposition or Hangul syllable, all zeros
                 return 0;
             } else if(norm16>=MIN_NORMAL_MAYBE_YES) {
                 // combining mark
                 norm16&=0xff;
                 return norm16|(norm16<<8);
             } else if(norm16>=minMaybeYes) {
                 return 0;
             } else if(isDecompNoAlgorithmic(norm16)) {
                 c=mapAlgorithmic(c, norm16);
             } else {
                 // c decomposes, get everything from the variable-length extra data
                 int firstUnit=extraData.charAt(norm16);
                 if((firstUnit&MAPPING_LENGTH_MASK)==0) {
                     // A character that is deleted (maps to an empty string) must
                     // get the worst-case lccc and tccc values because arbitrary
                     // characters on both sides will become adjacent.
                     return 0x1ff;
                 } else {
                     int fcd16=firstUnit>>8;  // tccc
                     if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
                         fcd16|=extraData.charAt(norm16-1)&0xff00;  // lccc
                     }
                     return fcd16;
                 }
             }
         }
     }

     /**
      * Gets the decomposition for one code point.
      * @param c code point
      * @return c's decomposition, if it has one; returns null if it does not have a decomposition
      */
     public String getDecomposition(int c) {
         int decomp=-1;
         int norm16;
         for(;;) {
             if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
                 // c does not decompose
             } else if(isHangul(norm16)) {
                 // Hangul syllable: decompose algorithmically
                 StringBuilder buffer=new StringBuilder();
                 Hangul.decompose(c, buffer);
                 return buffer.toString();
             } else if(isDecompNoAlgorithmic(norm16)) {
                 decomp=c=mapAlgorithmic(c, norm16);
                 continue;
             } else {
                 // c decomposes, get everything from the variable-length extra data
                 int length=extraData.charAt(norm16++)&MAPPING_LENGTH_MASK;
                 return extraData.substring(norm16, norm16+length);
             }
             if(decomp<0) {
                 return null;
             } else {
                 return UTF16.valueOf(decomp);
             }
         }
     }

     public static final int MIN_CCC_LCCC_CP=0x300;

     public static final int MIN_YES_YES_WITH_CC=0xff01;
     public static final int JAMO_VT=0xff00;
     public static final int MIN_NORMAL_MAYBE_YES=0xfe00;
     public static final int MAX_DELTA=0x40;

     // Byte offsets from the start of the data, after the generic header.
     public static final int IX_NORM_TRIE_OFFSET=0;
     public static final int IX_EXTRA_DATA_OFFSET=1;
     public static final int IX_SMALL_FCD_OFFSET=2;

     // Code point thresholds for quick check codes.
     public static final int IX_MIN_DECOMP_NO_CP=8;
     public static final int IX_MIN_COMP_NO_MAYBE_CP=9;

     // Norm16 value thresholds for quick check combinations and types of extra data.
     // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
     public static final int IX_MIN_YES_NO=10;
     public static final int IX_MIN_NO_NO=11;
     public static final int IX_LIMIT_NO_NO=12;
     public static final int IX_MIN_MAYBE_YES=13;

     // Mappings only in [minYesNoMappingsOnly..minNoNo[.
     public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;

     public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
     public static final int MAPPING_LENGTH_MASK=0x1f;

     public static final int COMP_1_LAST_TUPLE=0x8000;
     public static final int COMP_1_TRIPLE=1;
     public static final int COMP_1_TRAIL_LIMIT=0x3400;
     public static final int COMP_1_TRAIL_MASK=0x7ffe;
     public static final int COMP_1_TRAIL_SHIFT=9;  // 10-1 for the "triple" bit
     public static final int COMP_2_TRAIL_SHIFT=6;
     public static final int COMP_2_TRAIL_MASK=0xffc0;

     // higher-level functionality ------------------------------------------ ***

     /**
      * Decomposes s[src, limit[ and writes the result to dest.
      * limit can be NULL if src is NUL-terminated.
      * destLengthEstimate is the initial dest buffer capacity and can be -1.
      */
     public void decompose(CharSequence s, int src, int limit, StringBuilder dest,
                    int destLengthEstimate) {
         if(destLengthEstimate<0) {
             destLengthEstimate=limit-src;
         }
         dest.setLength(0);
         ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate);
         decompose(s, src, limit, buffer);
     }

     // Dual functionality:
     // buffer!=NULL: normalize
     // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
     public int decompose(CharSequence s, int src, int limit,
                          ReorderingBuffer buffer) {
         int minNoCP=minDecompNoCP;

         int prevSrc;
         int c=0;
         int norm16=0;

         // only for quick check
         int prevBoundary=src;
         int prevCC=0;

         for(;;) {
             // count code units below the minimum or with irrelevant data for the quick check
             for(prevSrc=src; src!=limit;) {
                 if( (c=s.charAt(src))<minNoCP ||
                     isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
                 ) {
                     ++src;
                 } else if(!UTF16.isSurrogate((char)c)) {
                     break;
                 } else {
                     char c2;
                     if(UTF16Plus.isSurrogateLead(c)) {
                         if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
                             c=Character.toCodePoint((char)c, c2);
                         }
                     } else /* trail surrogate */ {
                         if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
                             --src;
                             c=Character.toCodePoint(c2, (char)c);
                         }
                     }
                     if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
                         src+=Character.charCount(c);
                     } else {
                         break;
                     }
                 }
             }
             // copy these code units all at once
             if(src!=prevSrc) {
                 if(buffer!=null) {
                     buffer.flushAndAppendZeroCC(s, prevSrc, src);
                 } else {
                     prevCC=0;
                     prevBoundary=src;
                 }
             }
             if(src==limit) {
                 break;
             }

             // Check one above-minimum, relevant code point.
             src+=Character.charCount(c);
             if(buffer!=null) {
                 decompose(c, norm16, buffer);
             } else {
                 if(isDecompYes(norm16)) {
                     int cc=getCCFromYesOrMaybe(norm16);
                     if(prevCC<=cc || cc==0) {
                         prevCC=cc;
                         if(cc<=1) {
                             prevBoundary=src;
                         }
                         continue;
                     }
                 }
                 return prevBoundary;  // "no" or cc out of order
             }
         }
         return src;
     }

     public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
         int limit=s.length();
         if(limit==0) {
             return;
         }
         if(doDecompose) {
             decompose(s, 0, limit, buffer);
             return;
         }
         // Just merge the strings at the boundary.
         int c=Character.codePointAt(s, 0);
         int src=0;
         int firstCC, prevCC, cc;
         firstCC=prevCC=cc=getCC(getNorm16(c));
         while(cc!=0) {
             prevCC=cc;
             src+=Character.charCount(c);
             if(src>=limit) {
                 break;
             }
             c=Character.codePointAt(s, src);
             cc=getCC(getNorm16(c));
         };
         buffer.append(s, 0, src, firstCC, prevCC);
         buffer.append(s, src, limit);
     }

     // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
     // doCompose: normalize
     // !doCompose: isNormalized (buffer must be empty and initialized)
     public boolean compose(CharSequence s, int src, int limit,
                            boolean onlyContiguous,
                            boolean doCompose,
                            ReorderingBuffer buffer) {
         int minNoMaybeCP=minCompNoMaybeCP;

         /*
          * prevBoundary points to the last character before the current one
          * that has a composition boundary before it with ccc==0 and quick check "yes".
          * Keeping track of prevBoundary saves us looking for a composition boundary
          * when we find a "no" or "maybe".
          *
          * When we back out from prevSrc back to prevBoundary,
          * then we also remove those same characters (which had been simply copied
          * or canonically-order-inserted) from the ReorderingBuffer.
          * Therefore, at all times, the [prevBoundary..prevSrc[ source units
          * must correspond 1:1 to destination units at the end of the destination buffer.
          */
         int prevBoundary=src;
         int prevSrc;
         int c=0;
         int norm16=0;

         // only for isNormalized
         int prevCC=0;

         for(;;) {
             // count code units below the minimum or with irrelevant data for the quick check
             for(prevSrc=src; src!=limit;) {
                 if( (c=s.charAt(src))<minNoMaybeCP ||
                     isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
                 ) {
                     ++src;
                 } else if(!UTF16.isSurrogate((char)c)) {
                     break;
                 } else {
                     char c2;
                     if(UTF16Plus.isSurrogateLead(c)) {
                         if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
                             c=Character.toCodePoint((char)c, c2);
                         }
                     } else /* trail surrogate */ {
                         if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
                             --src;
                             c=Character.toCodePoint(c2, (char)c);
                         }
                     }
                     if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
                         src+=Character.charCount(c);
                     } else {
                         break;
                     }
                 }
             }
             // copy these code units all at once
             if(src!=prevSrc) {
                 if(src==limit) {
                     if(doCompose) {
                         buffer.flushAndAppendZeroCC(s, prevSrc, src);
                     }
                     break;
                 }
                 // Set prevBoundary to the last character in the quick check loop.
                 prevBoundary=src-1;
                 if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
                     Character.isHighSurrogate(s.charAt(prevBoundary-1))
                 ) {
                     --prevBoundary;
                 }
                 if(doCompose) {
                     // The last "quick check yes" character is excluded from the
                     // flush-and-append call in case it needs to be modified.
                     buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
                     buffer.append(s, prevBoundary, src);
                 } else {
                     prevCC=0;
                 }
                 // The start of the current character (c).
                 prevSrc=src;
             } else if(src==limit) {
                 break;
             }

             src+=Character.charCount(c);
             /*
              * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
              * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
              * or has ccc!=0.
              * Check for Jamo V/T, then for regular characters.
              * c is not a Hangul syllable or Jamo L because those have "yes" properties.
              */
             if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
                 char prev=s.charAt(prevSrc-1);
                 boolean needToDecompose=false;
                 if(c<Hangul.JAMO_T_BASE) {
                     // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
                     prev-=Hangul.JAMO_L_BASE;
                     if(prev<Hangul.JAMO_L_COUNT) {
                         if(!doCompose) {
                             return false;
                         }
                         char syllable=(char)
                             (Hangul.HANGUL_BASE+
                              (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
                              Hangul.JAMO_T_COUNT);
                         char t;
                         if(src!=limit && (t=(char)(s.charAt(src)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
                             ++src;
                             syllable+=t;  // The next character was a Jamo T.
                             prevBoundary=src;
                             buffer.setLastChar(syllable);
                             continue;
                         }
                         // If we see L+V+x where x!=T then we drop to the slow path,
                         // decompose and recompose.
                         // This is to deal with NFKC finding normal L and V but a
                         // compatibility variant of a T. We need to either fully compose that
                         // combination here (which would complicate the code and may not work
                         // with strange custom data) or use the slow path -- or else our replacing
                         // two input characters (L+V) with one output character (LV syllable)
                         // would violate the invariant that [prevBoundary..prevSrc[ has the same
                         // length as what we appended to the buffer since prevBoundary.
                         needToDecompose=true;
                     }
                 } else if(Hangul.isHangulWithoutJamoT(prev)) {
                     // c is a Jamo Trailing consonant,
                     // compose with previous Hangul LV that does not contain a Jamo T.
                     if(!doCompose) {
                         return false;
                     }
                     buffer.setLastChar((char)(prev+c-Hangul.JAMO_T_BASE));
                     prevBoundary=src;
                     continue;
                 }
                 if(!needToDecompose) {
                     // The Jamo V/T did not compose into a Hangul syllable.
                     if(doCompose) {
                         buffer.append((char)c);
                     } else {
                         prevCC=0;
                     }
                     continue;
                 }
             }
             /*
              * Source buffer pointers:
              *
              *  all done      quick check   current char  not yet
              *                "yes" but     (c)           processed
              *                may combine
              *                forward
              * [-------------[-------------[-------------[-------------[
              * |             |             |             |             |
              * orig. src     prevBoundary  prevSrc       src           limit
              *
              *
              * Destination buffer pointers inside the ReorderingBuffer:
              *
              *  all done      might take    not filled yet
              *                characters for
              *                reordering
              * [-------------[-------------[-------------[
              * |             |             |             |
              * start         reorderStart  limit         |
              *                             +remainingCap.+
              */
             if(norm16>=MIN_YES_YES_WITH_CC) {
                 int cc=norm16&0xff;  // cc!=0
                 if( onlyContiguous &&  // FCC
                     (doCompose ? buffer.getLastCC() : prevCC)==0 &&
                     prevBoundary<prevSrc &&
                     // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
                     // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
                     // passed the quick check "yes && ccc==0" test.
                     // Check whether the last character was a "yesYes" or a "yesNo".
                     // If a "yesNo", then we get its trailing ccc from its
                     // mapping and check for canonical order.
                     // All other cases are ok.
                     getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
                 ) {
                     // Fails FCD test, need to decompose and contiguously recompose.
                     if(!doCompose) {
                         return false;
                     }
                 } else if(doCompose) {
                     buffer.append(c, cc);
                     continue;
                 } else if(prevCC<=cc) {
                     prevCC=cc;
                     continue;
                 } else {
                     return false;
                 }
             } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
                 return false;
             }

             /*
              * Find appropriate boundaries around this character,
              * decompose the source text from between the boundaries,
              * and recompose it.
              *
              * We may need to remove the last few characters from the ReorderingBuffer
              * to account for source text that was copied or appended
              * but needs to take part in the recomposition.
              */

             /*
              * Find the last composition boundary in [prevBoundary..src[.
              * It is either the decomposition of the current character (at prevSrc),
              * or prevBoundary.
              */
             if(hasCompBoundaryBefore(c, norm16)) {
                 prevBoundary=prevSrc;
             } else if(doCompose) {
                 buffer.removeSuffix(prevSrc-prevBoundary);
             }

             // Find the next composition boundary in [src..limit[ -
             // modifies src to point to the next starter.
             src=findNextCompBoundary(s, src, limit);

             // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
             int recomposeStartIndex=buffer.length();
             decomposeShort(s, prevBoundary, src, buffer);
             recompose(buffer, recomposeStartIndex, onlyContiguous);
             if(!doCompose) {
                 if(!buffer.equals(s, prevBoundary, src)) {
                     return false;
                 }
                 buffer.remove();
                 prevCC=0;
             }

             // Move to the next starter. We never need to look back before this point again.
             prevBoundary=src;
         }
         return true;
     }

     /**
      * Very similar to compose(): Make the same changes in both places if relevant.
      * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
      * !doSpan: quickCheck
      * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and
      *         bit 0: set if "maybe"; otherwise, if the span length&lt;s.length()
      *         then the quick check result is "no"
      */
     public int composeQuickCheck(CharSequence s, int src, int limit,
                                  boolean onlyContiguous, boolean doSpan) {
         int qcResult=0;
         int minNoMaybeCP=minCompNoMaybeCP;

         /*
          * prevBoundary points to the last character before the current one
          * that has a composition boundary before it with ccc==0 and quick check "yes".
          */
         int prevBoundary=src;
         int prevSrc;
         int c=0;
         int norm16=0;
         int prevCC=0;

         for(;;) {
             // count code units below the minimum or with irrelevant data for the quick check
             for(prevSrc=src;;) {
                 if(src==limit) {
                     return (src<<1)|qcResult;  // "yes" or "maybe"
                 }
                 if( (c=s.charAt(src))<minNoMaybeCP ||
                     isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
                 ) {
                     ++src;
                 } else if(!UTF16.isSurrogate((char)c)) {
                     break;
                 } else {
                     char c2;
                     if(UTF16Plus.isSurrogateLead(c)) {
                         if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
                             c=Character.toCodePoint((char)c, c2);
                         }
                     } else /* trail surrogate */ {
                         if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
                             --src;
                             c=Character.toCodePoint(c2, (char)c);
                         }
                     }
                     if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
                         src+=Character.charCount(c);
                     } else {
                         break;
                     }
                 }
             }
             if(src!=prevSrc) {
                 // Set prevBoundary to the last character in the quick check loop.
                 prevBoundary=src-1;
                 if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
                         Character.isHighSurrogate(s.charAt(prevBoundary-1))
                 ) {
                     --prevBoundary;
                 }
                 prevCC=0;
                 // The start of the current character (c).
                 prevSrc=src;
             }

             src+=Character.charCount(c);
             /*
              * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
              * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
              * or has ccc!=0.
              */
             if(isMaybeOrNonZeroCC(norm16)) {
                 int cc=getCCFromYesOrMaybe(norm16);
                 if( onlyContiguous &&  // FCC
                     cc!=0 &&
                     prevCC==0 &&
                     prevBoundary<prevSrc &&
                     // prevCC==0 && prevBoundary<prevSrc tell us that
                     // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
                     // passed the quick check "yes && ccc==0" test.
                     // Check whether the last character was a "yesYes" or a "yesNo".
                     // If a "yesNo", then we get its trailing ccc from its
                     // mapping and check for canonical order.
                     // All other cases are ok.
                     getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
                 ) {
                     // Fails FCD test.
                 } else if(prevCC<=cc || cc==0) {
                     prevCC=cc;
                     if(norm16<MIN_YES_YES_WITH_CC) {
                         if(!doSpan) {
                             qcResult=1;
                         } else {
                             return prevBoundary<<1;  // spanYes does not care to know it's "maybe"
                         }
                     }
                     continue;
                 }
             }
             return prevBoundary<<1;  // "no"
         }
     }

     public void composeAndAppend(CharSequence s,
                                  boolean doCompose,
                                  boolean onlyContiguous,
                                  ReorderingBuffer buffer) {
         int src=0, limit=s.length();
         if(!buffer.isEmpty()) {
             int firstStarterInSrc=findNextCompBoundary(s, 0, limit);
             if(0!=firstStarterInSrc) {
                 int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
                                                                buffer.length());
                 StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
                                                        firstStarterInSrc+16);
                 middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
                 buffer.removeSuffix(buffer.length()-lastStarterInDest);
                 middle.append(s, 0, firstStarterInSrc);
                 compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
                 src=firstStarterInSrc;
             }
         }
         if(doCompose) {
             compose(s, src, limit, onlyContiguous, true, buffer);
         } else {
             buffer.append(s, src, limit);
         }
     }

     // Dual functionality:
     // buffer!=NULL: normalize
     // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
     public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
         // Note: In this function we use buffer->appendZeroCC() because we track
         // the lead and trail combining classes here, rather than leaving it to
         // the ReorderingBuffer.
         // The exception is the call to decomposeShort() which uses the buffer
         // in the normal way.

         // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
         // Similar to the prevBoundary in the compose() implementation.
         int prevBoundary=src;
         int prevSrc;
         int c=0;
         int prevFCD16=0;
         int fcd16=0;

         for(;;) {
             // count code units with lccc==0
             for(prevSrc=src; src!=limit;) {
                 if((c=s.charAt(src))<MIN_CCC_LCCC_CP) {
                     prevFCD16=~c;
                     ++src;
                 } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
                     prevFCD16=0;
                     ++src;
                 } else {
                     if(UTF16.isSurrogate((char)c)) {
                         char c2;
                         if(UTF16Plus.isSurrogateLead(c)) {
                             if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
                                 c=Character.toCodePoint((char)c, c2);
                             }
                         } else /* trail surrogate */ {
                             if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
                                 --src;
                                 c=Character.toCodePoint(c2, (char)c);
                             }
                         }
                     }
                     if((fcd16=getFCD16FromNormData(c))<=0xff) {
                         prevFCD16=fcd16;
                         src+=Character.charCount(c);
                     } else {
                         break;
                     }
               }
             }
             // copy these code units all at once
             if(src!=prevSrc) {
                 if(src==limit) {
                     if(buffer!=null) {
                         buffer.flushAndAppendZeroCC(s, prevSrc, src);
                     }
                     break;
                 }
                 prevBoundary=src;
                 // We know that the previous character's lccc==0.
                 if(prevFCD16<0) {
                     // Fetching the fcd16 value was deferred for this below-U+0300 code point.
                     int prev=~prevFCD16;
                     prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
                     if(prevFCD16>1) {
                         --prevBoundary;
                     }
                 } else {
                     int p=src-1;
                     if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
                         Character.isHighSurrogate(s.charAt(p-1))
                     ) {
                         --p;
                         // Need to fetch the previous character's FCD value because
                         // prevFCD16 was just for the trail surrogate code point.
                         prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1)));
                         // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
                     }
                     if(prevFCD16>1) {
                         prevBoundary=p;
                     }
                 }
                 if(buffer!=null) {
                     // The last lccc==0 character is excluded from the
                     // flush-and-append call in case it needs to be modified.
                     buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
                     buffer.append(s, prevBoundary, src);
                 }
                 // The start of the current character (c).
                 prevSrc=src;
             } else if(src==limit) {
                 break;
             }

             src+=Character.charCount(c);
             // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
             // Check for proper order, and decompose locally if necessary.
             if((prevFCD16&0xff)<=(fcd16>>8)) {
                 // proper order: prev tccc <= current lccc
                 if((fcd16&0xff)<=1) {
                     prevBoundary=src;
                 }
                 if(buffer!=null) {
                     buffer.appendZeroCC(c);
                 }
                 prevFCD16=fcd16;
                 continue;
             } else if(buffer==null) {
                 return prevBoundary;  // quick check "no"
             } else {
                 /*
                  * Back out the part of the source that we copied or appended
                  * already but is now going to be decomposed.
                  * prevSrc is set to after what was copied/appended.
                  */
                 buffer.removeSuffix(prevSrc-prevBoundary);
                 /*
                  * Find the part of the source that needs to be decomposed,
                  * up to the next safe boundary.
                  */
                 src=findNextFCDBoundary(s, src, limit);
                 /*
                  * The source text does not fulfill the conditions for FCD.
                  * Decompose and reorder a limited piece of the text.
                  */
                 decomposeShort(s, prevBoundary, src, buffer);
                 prevBoundary=src;
                 prevFCD16=0;
             }
         }
         return src;
     }

     // Note: hasDecompBoundary() could be implemented as aliases to
     // hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
     // at the cost of building the FCD trie for a decomposition normalizer.
     public boolean hasDecompBoundary(int c, boolean before) {
         for(;;) {
             if(c<minDecompNoCP) {
                 return true;
             }
             int norm16=getNorm16(c);
             if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
                 return true;
             } else if(norm16>MIN_NORMAL_MAYBE_YES) {
                 return false;  // ccc!=0
             } else if(isDecompNoAlgorithmic(norm16)) {
                 c=mapAlgorithmic(c, norm16);
             } else {
                 // c decomposes, get everything from the variable-length extra data
                 int firstUnit=extraData.charAt(norm16);
                 if((firstUnit&MAPPING_LENGTH_MASK)==0) {
                     return false;
                 }
                 if(!before) {
                     // decomp after-boundary: same as hasFCDBoundaryAfter(),
                     // fcd16<=1 || trailCC==0
                     if(firstUnit>0x1ff) {
                         return false;  // trailCC>1
                     }
                     if(firstUnit<=0xff) {
                         return true;  // trailCC==0
                     }
                     // if(trailCC==1) test leadCC==0, same as checking for before-boundary
                 }
                 // true if leadCC==0 (hasFCDBoundaryBefore())
                 return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16-1)&0xff00)==0;
             }
         }
     }

     public boolean hasCompBoundaryBefore(int c) {
         return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
     }

     private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
     private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
     private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
     private boolean isHangul(int norm16) { return norm16==minYesNo; }
     private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }

     // UBool isCompYes(uint16_t norm16) const {
     //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
     // }
     // UBool isCompYesOrMaybe(uint16_t norm16) const {
     //     return norm16<minNoNo || minMaybeYes<=norm16;
     // }
     // private boolean hasZeroCCFromDecompYes(int norm16) {
     //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
     // }
     private boolean isDecompYesAndZeroCC(int norm16) {
         return norm16<minYesNo ||
                norm16==JAMO_VT ||
                (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
     }

     /**
      * A little faster and simpler than isDecompYesAndZeroCC() but does not include
      * the MaybeYes which combine-forward and have ccc=0.
      * (Standard Unicode 5.2 normalization does not have such characters.)
      */
     private boolean isMostDecompYesAndZeroCC(int norm16) {
         return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
     }

     private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }

     // For use with isCompYes().
     // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
     // static uint8_t getCCFromYes(uint16_t norm16) {
     //     return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
     // }
     private int getCCFromNoNo(int norm16) {
         if((extraData.charAt(norm16)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
             return extraData.charAt(norm16-1)&0xff;
         } else {
             return 0;
         }
     }

     // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
     int getTrailCCFromCompYesAndZeroCC(CharSequence s, int cpStart, int cpLimit) {
         int c;
         if(cpStart==(cpLimit-1)) {
             c=s.charAt(cpStart);
         } else {
             c=Character.codePointAt(s, cpStart);
         }
         int prevNorm16=getNorm16(c);
         if(prevNorm16<=minYesNo) {
             return 0;  // yesYes and Hangul LV/LVT have ccc=tccc=0
         } else {
             return extraData.charAt(prevNorm16)>>8;  // tccc from yesNo
         }
     }

     // Requires algorithmic-NoNo.
     private int mapAlgorithmic(int c, int norm16) {
         return c+norm16-(minMaybeYes-MAX_DELTA-1);
     }

     // Requires minYesNo<norm16<limitNoNo.
     // private int getMapping(int norm16) { return /*extraData+*/norm16; }

     /**
      * @return index into maybeYesCompositions, or -1
      */
     private int getCompositionsListForDecompYes(int norm16) {
         if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
             return -1;
         } else {
             if((norm16-=minMaybeYes)<0) {
                 // norm16<minMaybeYes: index into extraData which is a substring at
                 //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
                 // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
                 norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
             }
             return norm16;
         }
     }

     /**
      * @return index into maybeYesCompositions
      */
     private int getCompositionsListForComposite(int norm16) {
         // composite has both mapping & compositions list
         int firstUnit=extraData.charAt(norm16);
         return (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16+  // mapping in maybeYesCompositions
             1+  // +1 to skip the first unit with the mapping lenth
             (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
     }

     // Decompose a short piece of text which is likely to contain characters that
     // fail the quick check loop and/or where the quick check loop's overhead
     // is unlikely to be amortized.
     // Called by the compose() and makeFCD() implementations.
     // Public in Java for collation implementation code.
     public void decomposeShort(CharSequence s, int src, int limit,
                                ReorderingBuffer buffer) {
         while(src<limit) {
             int c=Character.codePointAt(s, src);
             src+=Character.charCount(c);
             decompose(c, getNorm16(c), buffer);
         }
     }

     private void decompose(int c, int norm16,
                            ReorderingBuffer buffer) {
         // Only loops for 1:1 algorithmic mappings.
         for(;;) {
             // get the decomposition and the lead and trail cc's
             if(isDecompYes(norm16)) {
                 // c does not decompose
                 buffer.append(c, getCCFromYesOrMaybe(norm16));
             } else if(isHangul(norm16)) {
                 // Hangul syllable: decompose algorithmically
                 Hangul.decompose(c, buffer);
             } else if(isDecompNoAlgorithmic(norm16)) {
                 c=mapAlgorithmic(c, norm16);
                 norm16=getNorm16(c);
                 continue;
             } else {
                 // c decomposes, get everything from the variable-length extra data
                 int firstUnit=extraData.charAt(norm16);
                 int length=firstUnit&MAPPING_LENGTH_MASK;
                 int leadCC, trailCC;
                 trailCC=firstUnit>>8;
                 if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
                     leadCC=extraData.charAt(norm16-1)>>8;
                 } else {
                     leadCC=0;
                 }
                 ++norm16;  // skip over the firstUnit
                 buffer.append(extraData, norm16, norm16+length, leadCC, trailCC);
             }
             return;
         }
     }

     /**
      * Finds the recomposition result for
      * a forward-combining "lead" character,
      * specified with a pointer to its compositions list,
      * and a backward-combining "trail" character.
      *
      * <p>If the lead and trail characters combine, then this function returns
      * the following "compositeAndFwd" value:
      * <pre>
      * Bits 21..1  composite character
      * Bit      0  set if the composite is a forward-combining starter
      * </pre>
      * otherwise it returns -1.
      *
      * <p>The compositions list has (trail, compositeAndFwd) pair entries,
      * encoded as either pairs or triples of 16-bit units.
      * The last entry has the high bit of its first unit set.
      *
      * <p>The list is sorted by ascending trail characters (there are no duplicates).
      * A linear search is used.
      *
      * <p>See normalizer2impl.h for a more detailed description
      * of the compositions list format.
      */
     private static int combine(String compositions, int list, int trail) {
         int key1, firstUnit;
         if(trail<COMP_1_TRAIL_LIMIT) {
             // trail character is 0..33FF
             // result entry may have 2 or 3 units
             key1=(trail<<1);
             while(key1>(firstUnit=compositions.charAt(list))) {
                 list+=2+(firstUnit&COMP_1_TRIPLE);
             }
             if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
                 if((firstUnit&COMP_1_TRIPLE)!=0) {
                     return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
                 } else {
                     return compositions.charAt(list+1);
                 }
             }
         } else {
             // trail character is 3400..10FFFF
             // result entry has 3 units
             key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE);
             int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff;
             int secondUnit;
             for(;;) {
                 if(key1>(firstUnit=compositions.charAt(list))) {
                     list+=2+(firstUnit&COMP_1_TRIPLE);
                 } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
                     if(key2>(secondUnit=compositions.charAt(list+1))) {
                         if((firstUnit&COMP_1_LAST_TUPLE)!=0) {
                             break;
                         } else {
                             list+=3;
                         }
                     } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
                         return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2);
                     } else {
                         break;
                     }
                 } else {
                     break;
                 }
             }
         }
         return -1;
     }

     /*
      * Recomposes the buffer text starting at recomposeStartIndex
      * (which is in NFD - decomposed and canonically ordered),
      * and truncates the buffer contents.
      *
      * Note that recomposition never lengthens the text:
      * Any character consists of either one or two code units;
      * a composition may contain at most one more code unit than the original starter,
      * while the combining mark that is removed has at least one code unit.
      */
     private void recompose(ReorderingBuffer buffer, int recomposeStartIndex,
                            boolean onlyContiguous) {
         StringBuilder sb=buffer.getStringBuilder();
         int p=recomposeStartIndex;
         if(p==sb.length()) {
             return;
         }

         int starter, pRemove;
         int compositionsList;
         int c, compositeAndFwd;
         int norm16;
         int cc, prevCC;
         boolean starterIsSupplementary;

         // Some of the following variables are not used until we have a forward-combining starter
         // and are only initialized now to avoid compiler warnings.
         compositionsList=-1;  // used as indicator for whether we have a forward-combining starter
         starter=-1;
         starterIsSupplementary=false;
         prevCC=0;

         for(;;) {
             c=sb.codePointAt(p);
             p+=Character.charCount(c);
             norm16=getNorm16(c);
             cc=getCCFromYesOrMaybe(norm16);
             if( // this character combines backward and
                 isMaybe(norm16) &&
                 // we have seen a starter that combines forward and
                 compositionsList>=0 &&
                 // the backward-combining character is not blocked
                 (prevCC<cc || prevCC==0)) {
                 if(isJamoVT(norm16)) {
                     // c is a Jamo V/T, see if we can compose it with the previous character.
                     if(c<Hangul.JAMO_T_BASE) {
                         // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
                         char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
                         if(prev<Hangul.JAMO_L_COUNT) {
                             pRemove=p-1;
                             char syllable=(char)
                                 (Hangul.HANGUL_BASE+
                                  (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
                                  Hangul.JAMO_T_COUNT);
                             char t;
                             if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
                                 ++p;
                                 syllable+=t;  // The next character was a Jamo T.
                             }
                             sb.setCharAt(starter, syllable);
                             // remove the Jamo V/T
                             sb.delete(pRemove, p);
                             p=pRemove;
                         }
                     }
                     /*
                      * No "else" for Jamo T:
                      * Since the input is in NFD, there are no Hangul LV syllables that
                      * a Jamo T could combine with.
                      * All Jamo Ts are combined above when handling Jamo Vs.
                      */
                     if(p==sb.length()) {
                         break;
                     }
                     compositionsList=-1;
                     continue;
                 } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) {
                     // The starter and the combining mark (c) do combine.
                     int composite=compositeAndFwd>>1;

                     // Remove the combining mark.
                     pRemove=p-Character.charCount(c);  // pRemove & p: start & limit of the combining mark
                     sb.delete(pRemove, p);
                     p=pRemove;
                     // Replace the starter with the composite.
                     if(starterIsSupplementary) {
                         if(composite>0xffff) {
                             // both are supplementary
                             sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
                             sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite));
                         } else {
                             sb.setCharAt(starter, (char)c);
                             sb.deleteCharAt(starter+1);
                             // The composite is shorter than the starter,
                             // move the intermediate characters forward one.
                             starterIsSupplementary=false;
                             --p;
                         }
                     } else if(composite>0xffff) {
                         // The composite is longer than the starter,
                         // move the intermediate characters back one.
                         starterIsSupplementary=true;
                         sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
                         sb.insert(starter+1, UTF16.getTrailSurrogate(composite));
                         ++p;
                     } else {
                         // both are on the BMP
                         sb.setCharAt(starter, (char)composite);
                     }

                     // Keep prevCC because we removed the combining mark.

                     if(p==sb.length()) {
                         break;
                     }
                     // Is the composite a starter that combines forward?
                     if((compositeAndFwd&1)!=0) {
                         compositionsList=
                             getCompositionsListForComposite(getNorm16(composite));
                     } else {
                         compositionsList=-1;
                     }

                     // We combined; continue with looking for compositions.
                     continue;
                 }
             }

             // no combination this time
             prevCC=cc;
             if(p==sb.length()) {
                 break;
             }

             // If c did not combine, then check if it is a starter.
             if(cc==0) {
                 // Found a new starter.
                 if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) {
                     // It may combine with something, prepare for it.
                     if(c<=0xffff) {
                         starterIsSupplementary=false;
                         starter=p-1;
                     } else {
                         starterIsSupplementary=true;
                         starter=p-2;
                     }
                 }
             } else if(onlyContiguous) {
                 // FCC: no discontiguous compositions; any intervening character blocks.
                 compositionsList=-1;
             }
         }
         buffer.flush();
     }

     /**
      * Does c have a composition boundary before it?
      * True if its decomposition begins with a character that has
      * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
      * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
      * (isCompYesAndZeroCC()) so we need not decompose.
      */
     private boolean hasCompBoundaryBefore(int c, int norm16) {
         for(;;) {
             if(isCompYesAndZeroCC(norm16)) {
                 return true;
             } else if(isMaybeOrNonZeroCC(norm16)) {
                 return false;
             } else if(isDecompNoAlgorithmic(norm16)) {
                 c=mapAlgorithmic(c, norm16);
                 norm16=getNorm16(c);
             } else {
                 // c decomposes, get everything from the variable-length extra data
                 int firstUnit=extraData.charAt(norm16);
                 if((firstUnit&MAPPING_LENGTH_MASK)==0) {
                     return false;
                 }
                 if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0 && (extraData.charAt(norm16-1)&0xff00)!=0) {
                     return false;  // non-zero leadCC
                 }
                 return isCompYesAndZeroCC(getNorm16(Character.codePointAt(extraData, norm16+1)));
             }
         }
     }

     private int findPreviousCompBoundary(CharSequence s, int p) {
         while(p>0) {
             int c=Character.codePointBefore(s, p);
             p-=Character.charCount(c);
             if(hasCompBoundaryBefore(c)) {
                 break;
             }
             // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
             // but that's probably not worth the extra cost.
         }
         return p;
     }

     private int findNextCompBoundary(CharSequence s, int p, int limit) {
         while(p<limit) {
             int c=Character.codePointAt(s, p);
             int norm16=normTrie.get(c);
             if(hasCompBoundaryBefore(c, norm16)) {
                 break;
             }
             p+=Character.charCount(c);
         }
         return p;
     }

     private int findNextFCDBoundary(CharSequence s, int p, int limit) {
         while(p<limit) {
             int c=Character.codePointAt(s, p);
             if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) {
                 break;
             }
             p+=Character.charCount(c);
         }
         return p;
     }

     /**
      * Get the canonical decomposition
      * sherman  for ComposedCharIter
      */
     public static int getDecompose(int chars[], String decomps[]) {
         Normalizer2 impl = Normalizer2.getNFDInstance();

         int length=0;
         int norm16 = 0;
         int ch = -1;
         int i = 0;

         while (++ch < 0x2fa1e) {   //no cannoical above 0x3ffff
             //TBD !!!! the hack code heres save us about 50ms for startup
             //need a better solution/lookup
             if (ch == 0x30ff)
                 ch = 0xf900;
             else if (ch == 0x115bc)
                 ch = 0x1d15e;
             else if (ch == 0x1d1c1)
                 ch = 0x2f800;

             String s = impl.getDecomposition(ch);

             if(s != null && i < chars.length) {
                 chars[i] = ch;
                 decomps[i++] = s;
             }
         }
         return i;
     }

     //------------------------------------------------------
     // special method for Collation (RBTableBuilder.build())
     //------------------------------------------------------
     private static boolean needSingleQuotation(char c) {
         return (c >= 0x0009 && c <= 0x000D) ||
                (c >= 0x0020 && c <= 0x002F) ||
                (c >= 0x003A && c <= 0x0040) ||
                (c >= 0x005B && c <= 0x0060) ||
                (c >= 0x007B && c <= 0x007E);
     }

     public static String canonicalDecomposeWithSingleQuotation(String string) {
        Normalizer2 impl = Normalizer2.getNFDInstance();
        char[] src = string.toCharArray();
        int    srcIndex = 0;
        int    srcLimit = src.length;
        char[] dest = new char[src.length * 3];  //MAX_BUF_SIZE_DECOMPOSE = 3
        int    destIndex = 0;
        int    destLimit = dest.length;

         int prevSrc;
         String norm;
         int reorderStartIndex, length;
         char c1, c2;
         int cp;
         int minNoMaybe = 0x00c0;
         int cc, prevCC, trailCC;
         char[] p;
         int pStart;

         // initialize
         reorderStartIndex = 0;
         prevCC = 0;
         norm = null;
         cp = 0;
         pStart = 0;

         cc = trailCC = -1; // initialize to bogus value
         c1 = 0;
         for (;;) {
             prevSrc=srcIndex;
             //quick check (1)less than minNoMaybe (2)no decomp (3)hangual
             while (srcIndex != srcLimit &&
                    ((c1 = src[srcIndex]) < minNoMaybe ||
                     (norm = impl.getDecomposition(cp = string.codePointAt(srcIndex))) == null ||
                     (c1 >= '\uac00' && c1 <= '\ud7a3'))) { // Hangul Syllables
                 prevCC = 0;
                 srcIndex += (cp < 0x10000) ? 1 : 2;
             }

             // copy these code units all at once
             if (srcIndex != prevSrc) {
                 length = srcIndex - prevSrc;
                 if ((destIndex + length) <= destLimit) {
                     System.arraycopy(src,prevSrc,dest,destIndex,length);
                 }

                 destIndex += length;
                 reorderStartIndex = destIndex;
             }

             // end of source reached?
             if (srcIndex == srcLimit) {
                 break;
             }

             // cp already contains *src and norm32 is set for it, increment src
             srcIndex += (cp < 0x10000) ? 1 : 2;

             if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
                 c2 = 0;
                 length = 1;

                 if (Character.isHighSurrogate(c1)
                     || Character.isLowSurrogate(c1)) {
                     norm = null;
                 }
             } else {
                 length = 2;
                 c2 = src[srcIndex-1];
             }

           // get the decomposition and the lead and trail cc's
           if (norm == null) {
               // cp does not decompose
               cc = trailCC = UCharacter.getCombiningClass(cp);
               p = null;
               pStart = -1;
           } else {

                 pStart = 0;
                 p = norm.toCharArray();
                 length = p.length;
                 int cpNum = norm.codePointCount(0, length);
                 cc= UCharacter.getCombiningClass(norm.codePointAt(0));
                 trailCC= UCharacter.getCombiningClass(norm.codePointAt(cpNum-1));
                 if (length == 1) {
                     // fastpath a single code unit from decomposition
                     c1 = p[pStart];
                     c2 = 0;
                     p = null;
                     pStart = -1;
                 }
             }

             if((destIndex + length * 3) >= destLimit) {  // 2 SingleQuotations
                 // buffer overflow
                 char[] tmpBuf = new char[destLimit * 2];
                 System.arraycopy(dest, 0, tmpBuf, 0, destIndex);
                 dest = tmpBuf;
                 destLimit = dest.length;
             }

             // append the decomposition to the destination buffer, assume length>0
             {
                 int reorderSplit = destIndex;
                 if (p == null) {
                     // fastpath: single code point
                     if (needSingleQuotation(c1)) {
                         //if we need single quotation, no need to consider "prevCC"
                         //and it must NOT be a supplementary pair
                         dest[destIndex++] = '\'';
                         dest[destIndex++] = c1;
                         dest[destIndex++] = '\'';
                         trailCC = 0;
                     } else if(cc != 0 && cc < prevCC) {
                         // (c1, c2) is out of order with respect to the preceding
                         //  text
                         destIndex += length;
                         trailCC = insertOrdered(dest, reorderStartIndex,
                                                 reorderSplit, destIndex, c1, c2, cc);
                     } else {
                         // just append (c1, c2)
                         dest[destIndex++] = c1;
                         if(c2 != 0) {
                             dest[destIndex++] = c2;
                         }
                     }
                 } else {
                     // general: multiple code points (ordered by themselves)
                     // from decomposition
                     if (needSingleQuotation(p[pStart])) {
                         dest[destIndex++] = '\'';
                         dest[destIndex++] = p[pStart++];
                         dest[destIndex++] = '\'';
                         length--;
                         do {
                             dest[destIndex++] = p[pStart++];
                         } while(--length > 0);
                     } else if (cc != 0 && cc < prevCC) {
                         destIndex += length;
                         trailCC = mergeOrdered(dest, reorderStartIndex,
                                                reorderSplit, p, pStart,
                                                pStart+length);
                     } else {
                         // just append the decomposition
                         do {
                             dest[destIndex++] = p[pStart++];
                         } while (--length > 0);
                     }
                 }
             }
             prevCC = trailCC;
             if(prevCC == 0) {
                 reorderStartIndex = destIndex;
             }
         }

         return new String(dest, 0, destIndex);
     }

     /**
      * simpler, single-character version of mergeOrdered() -
      * bubble-insert one single code point into the preceding string
      * which is already canonically ordered
      * (c, c2) may or may not yet have been inserted at src[current]..src[p]
      *
      * it must be p=current+lengthof(c, c2) i.e. p=current+(c2==0 ? 1 : 2)
      *
      * before: src[start]..src[current] is already ordered, and
      *         src[current]..src[p]     may or may not hold (c, c2) but
      *                          must be exactly the same length as (c, c2)
      * after: src[start]..src[p] is ordered
      *
      * @return the trailing combining class
      */
     private static int/*unsigned byte*/ insertOrdered(char[] source,
                                                       int start,
                                                       int current, int p,
                                                       char c1, char c2,
                                                       int/*unsigned byte*/ cc) {
         int back, preBack;
         int r;
         int prevCC, trailCC=cc;

         if (start<current && cc!=0) {
             // search for the insertion point where cc>=prevCC
             preBack=back=current;

             PrevArgs prevArgs = new PrevArgs();
             prevArgs.current  = current;
             prevArgs.start    = start;
             prevArgs.src      = source;
             prevArgs.c1       = c1;
             prevArgs.c2       = c2;

             // get the prevCC
             prevCC=getPrevCC(prevArgs);
             preBack = prevArgs.current;

             if(cc<prevCC) {
                 // this will be the last code point, so keep its cc
                 trailCC=prevCC;
                 back=preBack;
                 while(start<preBack) {
                     prevCC=getPrevCC(prevArgs);
                     preBack=prevArgs.current;
                     if(cc>=prevCC) {
                         break;
                     }
                     back=preBack;
                 }

                 // this is where we are right now with all these indicies:
                 // [start]..[pPreBack] 0..? code points that we can ignore
                 // [pPreBack]..[pBack] 0..1 code points with prevCC<=cc
                 // [pBack]..[current] 0..n code points with >cc, move up to insert (c, c2)
                 // [current]..[p]         1 code point (c, c2) with cc

                 // move the code units in between up
                 r=p;
                 do {
                     source[--r]=source[--current];
                 } while (back!=current);
             }
         }

         // insert (c1, c2)
         source[current] = c1;
         if (c2!=0) {
             source[(current+1)] = c2;
         }

         // we know the cc of the last code point
         return trailCC;
     }

     /**
      * merge two UTF-16 string parts together
      * to canonically order (order by combining classes) their concatenation
      *
      * the two strings may already be adjacent, so that the merging is done
      * in-place if the two strings are not adjacent, then the buffer holding the
      * first one must be large enough
      * the second string may or may not be ordered in itself
      *
      * before: [start]..[current] is already ordered, and
      *         [next]..[limit]    may be ordered in itself, but
      *                          is not in relation to [start..current[
      * after: [start..current+(limit-next)[ is ordered
      *
      * the algorithm is a simple bubble-sort that takes the characters from
      * src[next++] and inserts them in correct combining class order into the
      * preceding part of the string
      *
      * since this function is called much less often than the single-code point
      * insertOrdered(), it just uses that for easier maintenance
      *
      * @return the trailing combining class
      */
     private static int /*unsigned byte*/ mergeOrdered(char[] source,
                                                       int start,
                                                       int current,
                                                       char[] data,
                                                         int next,
                                                         int limit) {
             int r;
             int /*unsigned byte*/ cc, trailCC=0;
             boolean adjacent;

             adjacent= current==next;
             NextCCArgs ncArgs = new NextCCArgs();
             ncArgs.source = data;
             ncArgs.next   = next;
             ncArgs.limit  = limit;

             if(start!=current) {

                 while(ncArgs.next<ncArgs.limit) {
                     cc=getNextCC(ncArgs);
                     if(cc==0) {
                         // does not bubble back
                         trailCC=0;
                         if(adjacent) {
                             current=ncArgs.next;
                         } else {
                             data[current++]=ncArgs.c1;
                             if(ncArgs.c2!=0) {
                                 data[current++]=ncArgs.c2;
                             }
                         }
                         break;
                     } else {
                         r=current+(ncArgs.c2==0 ? 1 : 2);
                         trailCC=insertOrdered(source,start, current, r,
                                               ncArgs.c1, ncArgs.c2, cc);
                         current=r;
                     }
                 }
             }

             if(ncArgs.next==ncArgs.limit) {
                 // we know the cc of the last code point
                 return trailCC;
             } else {
                 if(!adjacent) {
                     // copy the second string part
                     do {
                         source[current++]=data[ncArgs.next++];
                     } while(ncArgs.next!=ncArgs.limit);
                     ncArgs.limit=current;
                 }
                 PrevArgs prevArgs = new PrevArgs();
                 prevArgs.src   = data;
                 prevArgs.start = start;
                 prevArgs.current =  ncArgs.limit;
                 return getPrevCC(prevArgs);
             }

     }

     private static final class PrevArgs{
         char[] src;
         int start;
         int current;
         char c1;
         char c2;
     }

     private static final class NextCCArgs{
         char[] source;
         int next;
         int limit;
         char c1;
         char c2;
     }

     private static int /*unsigned*/ getPrevCC(PrevArgs args) {
         args.c1=args.src[--args.current];
         args.c2=0;

         if (args.c1 < MIN_CCC_LCCC_CP) {
             return 0;
         } else if (UTF16.isLeadSurrogate(args.c1)) {
             /* unpaired first surrogate */
             return 0;
         } else if (!UTF16.isTrailSurrogate(args.c1)) {
             return UCharacter.getCombiningClass(args.c1);
         } else if (args.current!=args.start &&
                     UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) {
             --args.current;
             return UCharacter.getCombiningClass(Character.toCodePoint(args.c2, args.c1));
         } else {
             /* unpaired second surrogate */
             args.c2=0;
             return 0;
         }
     }

     private static int /*unsigned byte*/ getNextCC(NextCCArgs args) {
         args.c1=args.source[args.next++];
         args.c2=0;

         if (UTF16.isTrailSurrogate(args.c1)) {
             /* unpaired second surrogate */
             return 0;
         } else if (!UTF16.isLeadSurrogate(args.c1)) {
             return UCharacter.getCombiningClass(args.c1);
         } else if (args.next!=args.limit &&
                         UTF16.isTrailSurrogate(args.c2=args.source[args.next])){
             ++args.next;
             return UCharacter.getCombiningClass(Character.toCodePoint(args.c1, args.c2));
         } else {
             /* unpaired first surrogate */
             args.c2=0;
             return 0;
         }
     }

     private VersionInfo dataVersion;

     // Code point thresholds for quick check codes.
     private int minDecompNoCP;
     private int minCompNoMaybeCP;

     // Norm16 value thresholds for quick check combinations and types of extra data.
     private int minYesNo;
     private int minYesNoMappingsOnly;
     private int minNoNo;
     private int limitNoNo;
     private int minMaybeYes;

     private Trie2_16 normTrie;
     private String maybeYesCompositions;
     private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
     private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
     private int[] tccc180;  // [0x180] tccc values for U+0000..U+017F

 }