| /* |
| * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| /* |
| * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved |
| * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved |
| * |
| * The original version of this source code and documentation is copyrighted |
| * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These |
| * materials are provided under terms of a License Agreement between Taligent |
| * and Sun. This technology is protected by multiple US and International |
| * patents. This notice and attribution to Taligent may not be removed. |
| * Taligent is a registered trademark of Taligent, Inc. |
| * |
| */ |
| |
| package java.text; |
| |
| import java.util.Vector; |
| import sun.text.UCompactIntArray; |
| import sun.text.IntHashtable; |
| |
| /** |
| * This class contains the static state of a RuleBasedCollator: The various |
| * tables that are used by the collation routines. Several RuleBasedCollators |
| * can share a single RBCollationTables object, easing memory requirements and |
| * improving performance. |
| */ |
| final class RBCollationTables { |
| //=========================================================================================== |
| // The following diagram shows the data structure of the RBCollationTables object. |
| // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6. |
| // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...". |
| // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and |
| // sorts 'o-umlaut' as if it's always expanded with 'e'. |
| // |
| // mapping table contracting list expanding list |
| // (contains all unicode char |
| // entries) ___ ____________ _________________________ |
| // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')| |
| // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------| |
| // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : | |
| // |____:___| | |_:_| |------------| | |-------------------------| |
| // |____:___| | |'cH'|v('cH')| | | : | |
| // |__'a'___|-> v('a') | |------------| | |-------------------------| |
| // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : | |
| // |____:___| | |------------| | |-------------------------| |
| // |____:___| | |'CH'|v('CH')| | | : | |
| // |___'c'__|---------------- ------------ | |-------------------------| |
| // |____:___| | | : | |
| // |o-umlaut|---------------------------------------- |_________________________| |
| // |____:___| |
| // |
| // Noted by Helena Shih on 6/23/97 |
| //============================================================================================ |
| |
| public RBCollationTables(String rules, int decmp) throws ParseException { |
| this.rules = rules; |
| |
| RBTableBuilder builder = new RBTableBuilder(new BuildAPI()); |
| builder.build(rules, decmp); // this object is filled in through |
| // the BuildAPI object |
| } |
| |
| final class BuildAPI { |
| /** |
| * Private constructor. Prevents anyone else besides RBTableBuilder |
| * from gaining direct access to the internals of this class. |
| */ |
| private BuildAPI() { |
| } |
| |
| /** |
| * This function is used by RBTableBuilder to fill in all the members of this |
| * object. (Effectively, the builder class functions as a "friend" of this |
| * class, but to avoid changing too much of the logic, it carries around "shadow" |
| * copies of all these variables until the end of the build process and then |
| * copies them en masse into the actual tables object once all the construction |
| * logic is complete. This function does that "copying en masse". |
| * @param f2ary The value for frenchSec (the French-secondary flag) |
| * @param swap The value for SE Asian swapping rule |
| * @param map The collator's character-mapping table (the value for mapping) |
| * @param cTbl The collator's contracting-character table (the value for contractTable) |
| * @param eTbl The collator's expanding-character table (the value for expandTable) |
| * @param cFlgs The hash table of characters that participate in contracting- |
| * character sequences (the value for contractFlags) |
| * @param mso The value for maxSecOrder |
| * @param mto The value for maxTerOrder |
| */ |
| void fillInTables(boolean f2ary, |
| boolean swap, |
| UCompactIntArray map, |
| Vector<Vector<EntryPair>> cTbl, |
| Vector<int[]> eTbl, |
| IntHashtable cFlgs, |
| short mso, |
| short mto) { |
| frenchSec = f2ary; |
| seAsianSwapping = swap; |
| mapping = map; |
| contractTable = cTbl; |
| expandTable = eTbl; |
| contractFlags = cFlgs; |
| maxSecOrder = mso; |
| maxTerOrder = mto; |
| } |
| } |
| |
| /** |
| * Gets the table-based rules for the collation object. |
| * @return returns the collation rules that the table collation object |
| * was created from. |
| */ |
| public String getRules() |
| { |
| return rules; |
| } |
| |
| public boolean isFrenchSec() { |
| return frenchSec; |
| } |
| |
| public boolean isSEAsianSwapping() { |
| return seAsianSwapping; |
| } |
| |
| // ============================================================== |
| // internal (for use by CollationElementIterator) |
| // ============================================================== |
| |
| /** |
| * Get the entry of hash table of the contracting string in the collation |
| * table. |
| * @param ch the starting character of the contracting string |
| */ |
| Vector<EntryPair> getContractValues(int ch) |
| { |
| int index = mapping.elementAt(ch); |
| return getContractValuesImpl(index - CONTRACTCHARINDEX); |
| } |
| |
| //get contract values from contractTable by index |
| private Vector<EntryPair> getContractValuesImpl(int index) |
| { |
| if (index >= 0) |
| { |
| return contractTable.elementAt(index); |
| } |
| else // not found |
| { |
| return null; |
| } |
| } |
| |
| /** |
| * Returns true if this character appears anywhere in a contracting |
| * character sequence. (Used by CollationElementIterator.setOffset().) |
| */ |
| boolean usedInContractSeq(int c) { |
| return contractFlags.get(c) == 1; |
| } |
| |
| /** |
| * Return the maximum length of any expansion sequences that end |
| * with the specified comparison order. |
| * |
| * @param order a collation order returned by previous or next. |
| * @return the maximum length of any expansion seuences ending |
| * with the specified order. |
| * |
| * @see CollationElementIterator#getMaxExpansion |
| */ |
| int getMaxExpansion(int order) { |
| int result = 1; |
| |
| if (expandTable != null) { |
| // Right now this does a linear search through the entire |
| // expansion table. If a collator had a large number of expansions, |
| // this could cause a performance problem, but in practise that |
| // rarely happens |
| for (int i = 0; i < expandTable.size(); i++) { |
| int[] valueList = expandTable.elementAt(i); |
| int length = valueList.length; |
| |
| if (length > result && valueList[length-1] == order) { |
| result = length; |
| } |
| } |
| } |
| |
| return result; |
| } |
| |
| /** |
| * Get the entry of hash table of the expanding string in the collation |
| * table. |
| * @param idx the index of the expanding string value list |
| */ |
| final int[] getExpandValueList(int idx) { |
| return expandTable.elementAt(idx - EXPANDCHARINDEX); |
| } |
| |
| /** |
| * Get the comarison order of a character from the collation table. |
| * @return the comparison order of a character. |
| */ |
| int getUnicodeOrder(int ch) { |
| return mapping.elementAt(ch); |
| } |
| |
| short getMaxSecOrder() { |
| return maxSecOrder; |
| } |
| |
| short getMaxTerOrder() { |
| return maxTerOrder; |
| } |
| |
| /** |
| * Reverse a string. |
| */ |
| //shemran/Note: this is used for secondary order value reverse, no |
| // need to consider supplementary pair. |
| static void reverse (StringBuffer result, int from, int to) |
| { |
| int i = from; |
| char swap; |
| |
| int j = to - 1; |
| while (i < j) { |
| swap = result.charAt(i); |
| result.setCharAt(i, result.charAt(j)); |
| result.setCharAt(j, swap); |
| i++; |
| j--; |
| } |
| } |
| |
| static final int getEntry(Vector<EntryPair> list, String name, boolean fwd) { |
| for (int i = 0; i < list.size(); i++) { |
| EntryPair pair = list.elementAt(i); |
| if (pair.fwd == fwd && pair.entryName.equals(name)) { |
| return i; |
| } |
| } |
| return UNMAPPED; |
| } |
| |
| // ============================================================== |
| // constants |
| // ============================================================== |
| //sherman/Todo: is the value big enough????? |
| static final int EXPANDCHARINDEX = 0x7E000000; // Expand index follows |
| static final int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow |
| static final int UNMAPPED = 0xFFFFFFFF; |
| |
| static final int PRIMARYORDERMASK = 0xffff0000; |
| static final int SECONDARYORDERMASK = 0x0000ff00; |
| static final int TERTIARYORDERMASK = 0x000000ff; |
| static final int PRIMARYDIFFERENCEONLY = 0xffff0000; |
| static final int SECONDARYDIFFERENCEONLY = 0xffffff00; |
| static final int PRIMARYORDERSHIFT = 16; |
| static final int SECONDARYORDERSHIFT = 8; |
| |
| // ============================================================== |
| // instance variables |
| // ============================================================== |
| private String rules = null; |
| private boolean frenchSec = false; |
| private boolean seAsianSwapping = false; |
| |
| private UCompactIntArray mapping = null; |
| private Vector<Vector<EntryPair>> contractTable = null; |
| private Vector<int[]> expandTable = null; |
| private IntHashtable contractFlags = null; |
| |
| private short maxSecOrder = 0; |
| private short maxTerOrder = 0; |
| } |