blob: d8d85b5a00841fc8e78d67b53d6ce13c004332d0 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1999-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29 *
30 * The original version of this source code and documentation is copyrighted
31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32 * materials are provided under terms of a License Agreement between Taligent
33 * and Sun. This technology is protected by multiple US and International
34 * patents. This notice and attribution to Taligent may not be removed.
35 * Taligent is a registered trademark of Taligent, Inc.
36 *
37 */
38
39package java.text;
40
41import java.util.Vector;
42import sun.text.UCompactIntArray;
43import sun.text.IntHashtable;
44
45/**
46 * This class contains the static state of a RuleBasedCollator: The various
47 * tables that are used by the collation routines. Several RuleBasedCollators
48 * can share a single RBCollationTables object, easing memory requirements and
49 * improving performance.
50 */
51final class RBCollationTables {
52 //===========================================================================================
53 // The following diagram shows the data structure of the RBCollationTables object.
54 // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
55 // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
56 // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
57 // sorts 'o-umlaut' as if it's always expanded with 'e'.
58 //
59 // mapping table contracting list expanding list
60 // (contains all unicode char
61 // entries) ___ ____________ _________________________
62 // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
63 // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
64 // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
65 // |____:___| | |_:_| |------------| | |-------------------------|
66 // |____:___| | |'cH'|v('cH')| | | : |
67 // |__'a'___|-> v('a') | |------------| | |-------------------------|
68 // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
69 // |____:___| | |------------| | |-------------------------|
70 // |____:___| | |'CH'|v('CH')| | | : |
71 // |___'c'__|---------------- ------------ | |-------------------------|
72 // |____:___| | | : |
73 // |o-umlaut|---------------------------------------- |_________________________|
74 // |____:___|
75 //
76 // Noted by Helena Shih on 6/23/97
77 //============================================================================================
78
79 public RBCollationTables(String rules, int decmp) throws ParseException {
80 this.rules = rules;
81
82 RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
83 builder.build(rules, decmp); // this object is filled in through
84 // the BuildAPI object
85 }
86
87 final class BuildAPI {
88 /**
89 * Private constructor. Prevents anyone else besides RBTableBuilder
90 * from gaining direct access to the internals of this class.
91 */
92 private BuildAPI() {
93 }
94
95 /**
96 * This function is used by RBTableBuilder to fill in all the members of this
97 * object. (Effectively, the builder class functions as a "friend" of this
98 * class, but to avoid changing too much of the logic, it carries around "shadow"
99 * copies of all these variables until the end of the build process and then
100 * copies them en masse into the actual tables object once all the construction
101 * logic is complete. This function does that "copying en masse".
102 * @param f2ary The value for frenchSec (the French-secondary flag)
103 * @param swap The value for SE Asian swapping rule
104 * @param map The collator's character-mapping table (the value for mapping)
105 * @param cTbl The collator's contracting-character table (the value for contractTable)
106 * @param eTbl The collator's expanding-character table (the value for expandTable)
107 * @param cFlgs The hash table of characters that participate in contracting-
108 * character sequences (the value for contractFlags)
109 * @param mso The value for maxSecOrder
110 * @param mto The value for maxTerOrder
111 */
112 void fillInTables(boolean f2ary,
113 boolean swap,
114 UCompactIntArray map,
115 Vector cTbl,
116 Vector eTbl,
117 IntHashtable cFlgs,
118 short mso,
119 short mto) {
120 frenchSec = f2ary;
121 seAsianSwapping = swap;
122 mapping = map;
123 contractTable = cTbl;
124 expandTable = eTbl;
125 contractFlags = cFlgs;
126 maxSecOrder = mso;
127 maxTerOrder = mto;
128 }
129 }
130
131 /**
132 * Gets the table-based rules for the collation object.
133 * @return returns the collation rules that the table collation object
134 * was created from.
135 */
136 public String getRules()
137 {
138 return rules;
139 }
140
141 public boolean isFrenchSec() {
142 return frenchSec;
143 }
144
145 public boolean isSEAsianSwapping() {
146 return seAsianSwapping;
147 }
148
149 // ==============================================================
150 // internal (for use by CollationElementIterator)
151 // ==============================================================
152
153 /**
154 * Get the entry of hash table of the contracting string in the collation
155 * table.
156 * @param ch the starting character of the contracting string
157 */
158 Vector getContractValues(int ch)
159 {
160 int index = mapping.elementAt(ch);
161 return getContractValuesImpl(index - CONTRACTCHARINDEX);
162 }
163
164 //get contract values from contractTable by index
165 private Vector getContractValuesImpl(int index)
166 {
167 if (index >= 0)
168 {
169 return (Vector)contractTable.elementAt(index);
170 }
171 else // not found
172 {
173 return null;
174 }
175 }
176
177 /**
178 * Returns true if this character appears anywhere in a contracting
179 * character sequence. (Used by CollationElementIterator.setOffset().)
180 */
181 boolean usedInContractSeq(int c) {
182 return contractFlags.get(c) == 1;
183 }
184
185 /**
186 * Return the maximum length of any expansion sequences that end
187 * with the specified comparison order.
188 *
189 * @param order a collation order returned by previous or next.
190 * @return the maximum length of any expansion seuences ending
191 * with the specified order.
192 *
193 * @see CollationElementIterator#getMaxExpansion
194 */
195 int getMaxExpansion(int order)
196 {
197 int result = 1;
198
199 if (expandTable != null) {
200 // Right now this does a linear search through the entire
201 // expandsion table. If a collator had a large number of expansions,
202 // this could cause a performance problem, but in practise that
203 // rarely happens
204 for (int i = 0; i < expandTable.size(); i++) {
205 int[] valueList = (int [])expandTable.elementAt(i);
206 int length = valueList.length;
207
208 if (length > result && valueList[length-1] == order) {
209 result = length;
210 }
211 }
212 }
213
214 return result;
215 }
216
217 /**
218 * Get the entry of hash table of the expanding string in the collation
219 * table.
220 * @param idx the index of the expanding string value list
221 */
222 final int[] getExpandValueList(int order) {
223 return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
224 }
225
226 /**
227 * Get the comarison order of a character from the collation table.
228 * @return the comparison order of a character.
229 */
230 int getUnicodeOrder(int ch)
231 {
232 return mapping.elementAt(ch);
233 }
234
235 short getMaxSecOrder() {
236 return maxSecOrder;
237 }
238
239 short getMaxTerOrder() {
240 return maxTerOrder;
241 }
242
243 /**
244 * Reverse a string.
245 */
246 //shemran/Note: this is used for secondary order value reverse, no
247 // need to consider supplementary pair.
248 static void reverse (StringBuffer result, int from, int to)
249 {
250 int i = from;
251 char swap;
252
253 int j = to - 1;
254 while (i < j) {
255 swap = result.charAt(i);
256 result.setCharAt(i, result.charAt(j));
257 result.setCharAt(j, swap);
258 i++;
259 j--;
260 }
261 }
262
263 final static int getEntry(Vector list, String name, boolean fwd) {
264 for (int i = 0; i < list.size(); i++) {
265 EntryPair pair = (EntryPair)list.elementAt(i);
266 if (pair.fwd == fwd && pair.entryName.equals(name)) {
267 return i;
268 }
269 }
270 return UNMAPPED;
271 }
272
273 // ==============================================================
274 // constants
275 // ==============================================================
276 //sherman/Todo: is the value big enough?????
277 final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
278 final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
279 final static int UNMAPPED = 0xFFFFFFFF;
280
281 final static int PRIMARYORDERMASK = 0xffff0000;
282 final static int SECONDARYORDERMASK = 0x0000ff00;
283 final static int TERTIARYORDERMASK = 0x000000ff;
284 final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
285 final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
286 final static int PRIMARYORDERSHIFT = 16;
287 final static int SECONDARYORDERSHIFT = 8;
288
289 // ==============================================================
290 // instance variables
291 // ==============================================================
292 private String rules = null;
293 private boolean frenchSec = false;
294 private boolean seAsianSwapping = false;
295
296 private UCompactIntArray mapping = null;
297 private Vector contractTable = null;
298 private Vector expandTable = null;
299 private IntHashtable contractFlags = null;
300
301 private short maxSecOrder = 0;
302 private short maxTerOrder = 0;
303}