blob: 890b4bab39d9630c334618a929a3391891168076 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29 *
30 * The original version of this source code and documentation is copyrighted
31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32 * materials are provided under terms of a License Agreement between Taligent
33 * and Sun. This technology is protected by multiple US and International
34 * patents. This notice and attribution to Taligent may not be removed.
35 * Taligent is a registered trademark of Taligent, Inc.
36 *
37 */
38
39package java.text;
40
41import java.text.spi.CollatorProvider;
42import java.util.Locale;
43import java.util.MissingResourceException;
44import java.util.ResourceBundle;
45import java.util.spi.LocaleServiceProvider;
46import sun.misc.SoftCache;
47import sun.util.resources.LocaleData;
48import sun.util.LocaleServiceProviderPool;
49
50
51/**
52 * The <code>Collator</code> class performs locale-sensitive
53 * <code>String</code> comparison. You use this class to build
54 * searching and sorting routines for natural language text.
55 *
56 * <p>
57 * <code>Collator</code> is an abstract base class. Subclasses
58 * implement specific collation strategies. One subclass,
59 * <code>RuleBasedCollator</code>, is currently provided with
60 * the Java Platform and is applicable to a wide set of languages. Other
61 * subclasses may be created to handle more specialized needs.
62 *
63 * <p>
64 * Like other locale-sensitive classes, you can use the static
65 * factory method, <code>getInstance</code>, to obtain the appropriate
66 * <code>Collator</code> object for a given locale. You will only need
67 * to look at the subclasses of <code>Collator</code> if you need
68 * to understand the details of a particular collation strategy or
69 * if you need to modify that strategy.
70 *
71 * <p>
72 * The following example shows how to compare two strings using
73 * the <code>Collator</code> for the default locale.
74 * <blockquote>
75 * <pre>
76 * // Compare two strings in the default locale
77 * Collator myCollator = Collator.getInstance();
78 * if( myCollator.compare("abc", "ABC") < 0 )
79 * System.out.println("abc is less than ABC");
80 * else
81 * System.out.println("abc is greater than or equal to ABC");
82 * </pre>
83 * </blockquote>
84 *
85 * <p>
86 * You can set a <code>Collator</code>'s <em>strength</em> property
87 * to determine the level of difference considered significant in
88 * comparisons. Four strengths are provided: <code>PRIMARY</code>,
89 * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
90 * The exact assignment of strengths to language features is
91 * locale dependant. For example, in Czech, "e" and "f" are considered
92 * primary differences, while "e" and "&#283;" are secondary differences,
93 * "e" and "E" are tertiary differences and "e" and "e" are identical.
94 * The following shows how both case and accents could be ignored for
95 * US English.
96 * <blockquote>
97 * <pre>
98 * //Get the Collator for US English and set its strength to PRIMARY
99 * Collator usCollator = Collator.getInstance(Locale.US);
100 * usCollator.setStrength(Collator.PRIMARY);
101 * if( usCollator.compare("abc", "ABC") == 0 ) {
102 * System.out.println("Strings are equivalent");
103 * }
104 * </pre>
105 * </blockquote>
106 * <p>
107 * For comparing <code>String</code>s exactly once, the <code>compare</code>
108 * method provides the best performance. When sorting a list of
109 * <code>String</code>s however, it is generally necessary to compare each
110 * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
111 * provide better performance. The <code>CollationKey</code> class converts
112 * a <code>String</code> to a series of bits that can be compared bitwise
113 * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
114 * created by a <code>Collator</code> object for a given <code>String</code>.
115 * <br>
116 * <strong>Note:</strong> <code>CollationKey</code>s from different
117 * <code>Collator</code>s can not be compared. See the class description
118 * for {@link CollationKey}
119 * for an example using <code>CollationKey</code>s.
120 *
121 * @see RuleBasedCollator
122 * @see CollationKey
123 * @see CollationElementIterator
124 * @see Locale
125 * @author Helena Shih, Laura Werner, Richard Gillam
126 */
127
128public abstract class Collator
129 implements java.util.Comparator<Object>, Cloneable
130{
131 /**
132 * Collator strength value. When set, only PRIMARY differences are
133 * considered significant during comparison. The assignment of strengths
134 * to language features is locale dependant. A common example is for
135 * different base letters ("a" vs "b") to be considered a PRIMARY difference.
136 * @see java.text.Collator#setStrength
137 * @see java.text.Collator#getStrength
138 */
139 public final static int PRIMARY = 0;
140 /**
141 * Collator strength value. When set, only SECONDARY and above differences are
142 * considered significant during comparison. The assignment of strengths
143 * to language features is locale dependant. A common example is for
144 * different accented forms of the same base letter ("a" vs "\u00E4") to be
145 * considered a SECONDARY difference.
146 * @see java.text.Collator#setStrength
147 * @see java.text.Collator#getStrength
148 */
149 public final static int SECONDARY = 1;
150 /**
151 * Collator strength value. When set, only TERTIARY and above differences are
152 * considered significant during comparison. The assignment of strengths
153 * to language features is locale dependant. A common example is for
154 * case differences ("a" vs "A") to be considered a TERTIARY difference.
155 * @see java.text.Collator#setStrength
156 * @see java.text.Collator#getStrength
157 */
158 public final static int TERTIARY = 2;
159
160 /**
161 * Collator strength value. When set, all differences are
162 * considered significant during comparison. The assignment of strengths
163 * to language features is locale dependant. A common example is for control
164 * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
165 * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
166 * level. Additionally, differences between pre-composed accents such as
167 * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
168 * (A, combining-grave) will be considered significant at the IDENTICAL
169 * level if decomposition is set to NO_DECOMPOSITION.
170 */
171 public final static int IDENTICAL = 3;
172
173 /**
174 * Decomposition mode value. With NO_DECOMPOSITION
175 * set, accented characters will not be decomposed for collation. This
176 * is the default setting and provides the fastest collation but
177 * will only produce correct results for languages that do not use accents.
178 * @see java.text.Collator#getDecomposition
179 * @see java.text.Collator#setDecomposition
180 */
181 public final static int NO_DECOMPOSITION = 0;
182
183 /**
184 * Decomposition mode value. With CANONICAL_DECOMPOSITION
185 * set, characters that are canonical variants according to Unicode
186 * standard will be decomposed for collation. This should be used to get
187 * correct collation of accented characters.
188 * <p>
189 * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
190 * described in
191 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
192 * Technical Report #15</a>.
193 * @see java.text.Collator#getDecomposition
194 * @see java.text.Collator#setDecomposition
195 */
196 public final static int CANONICAL_DECOMPOSITION = 1;
197
198 /**
199 * Decomposition mode value. With FULL_DECOMPOSITION
200 * set, both Unicode canonical variants and Unicode compatibility variants
201 * will be decomposed for collation. This causes not only accented
202 * characters to be collated, but also characters that have special formats
203 * to be collated with their norminal form. For example, the half-width and
204 * full-width ASCII and Katakana characters are then collated together.
205 * FULL_DECOMPOSITION is the most complete and therefore the slowest
206 * decomposition mode.
207 * <p>
208 * FULL_DECOMPOSITION corresponds to Normalization Form KD as
209 * described in
210 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
211 * Technical Report #15</a>.
212 * @see java.text.Collator#getDecomposition
213 * @see java.text.Collator#setDecomposition
214 */
215 public final static int FULL_DECOMPOSITION = 2;
216
217 /**
218 * Gets the Collator for the current default locale.
219 * The default locale is determined by java.util.Locale.getDefault.
220 * @return the Collator for the default locale.(for example, en_US)
221 * @see java.util.Locale#getDefault
222 */
223 public static synchronized Collator getInstance() {
224 return getInstance(Locale.getDefault());
225 }
226
227 /**
228 * Gets the Collator for the desired locale.
229 * @param desiredLocale the desired locale.
230 * @return the Collator for the desired locale.
231 * @see java.util.Locale
232 * @see java.util.ResourceBundle
233 */
234 public static synchronized
235 Collator getInstance(Locale desiredLocale)
236 {
237 Collator result = (Collator) cache.get(desiredLocale);
238 if (result != null) {
239 return (Collator)result.clone(); // make the world safe
240 }
241
242 // Check whether a provider can provide an implementation that's closer
243 // to the requested locale than what the Java runtime itself can provide.
244 LocaleServiceProviderPool pool =
245 LocaleServiceProviderPool.getPool(CollatorProvider.class);
246 if (pool.hasProviders()) {
247 Collator providersInstance = pool.getLocalizedObject(
248 CollatorGetter.INSTANCE,
249 desiredLocale,
250 desiredLocale);
251 if (providersInstance != null) {
252 return providersInstance;
253 }
254 }
255
256 // Load the resource of the desired locale from resource
257 // manager.
258 String colString = "";
259 try {
260 ResourceBundle resource = LocaleData.getCollationData(desiredLocale);
261
262 colString = resource.getString("Rule");
263 } catch (MissingResourceException e) {
264 // Use default values
265 }
266 try
267 {
268 result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
269 colString,
270 CANONICAL_DECOMPOSITION );
271 }
272 catch(ParseException foo)
273 {
274 // predefined tables should contain correct grammar
275 try {
276 result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
277 } catch (ParseException bar) {
278 // do nothing
279 }
280 }
281 // Now that RuleBasedCollator adds expansions for pre-composed characters
282 // into their decomposed equivalents, the default collators don't need
283 // to have decomposition turned on. Laura, 5/5/98, bug 4114077
284 result.setDecomposition(NO_DECOMPOSITION);
285
286 cache.put(desiredLocale,result);
287 return (Collator)result.clone();
288 }
289
290 /**
291 * Compares the source string to the target string according to the
292 * collation rules for this Collator. Returns an integer less than,
293 * equal to or greater than zero depending on whether the source String is
294 * less than, equal to or greater than the target string. See the Collator
295 * class description for an example of use.
296 * <p>
297 * For a one time comparison, this method has the best performance. If a
298 * given String will be involved in multiple comparisons, CollationKey.compareTo
299 * has the best performance. See the Collator class description for an example
300 * using CollationKeys.
301 * @param source the source string.
302 * @param target the target string.
303 * @return Returns an integer value. Value is less than zero if source is less than
304 * target, value is zero if source and target are equal, value is greater than zero
305 * if source is greater than target.
306 * @see java.text.CollationKey
307 * @see java.text.Collator#getCollationKey
308 */
309 public abstract int compare(String source, String target);
310
311 /**
312 * Compares its two arguments for order. Returns a negative integer,
313 * zero, or a positive integer as the first argument is less than, equal
314 * to, or greater than the second.
315 * <p>
316 * This implementation merely returns
317 * <code> compare((String)o1, (String)o2) </code>.
318 *
319 * @return a negative integer, zero, or a positive integer as the
320 * first argument is less than, equal to, or greater than the
321 * second.
322 * @exception ClassCastException the arguments cannot be cast to Strings.
323 * @see java.util.Comparator
324 * @since 1.2
325 */
326 public int compare(Object o1, Object o2) {
327 return compare((String)o1, (String)o2);
328 }
329
330 /**
331 * Transforms the String into a series of bits that can be compared bitwise
332 * to other CollationKeys. CollationKeys provide better performance than
333 * Collator.compare when Strings are involved in multiple comparisons.
334 * See the Collator class description for an example using CollationKeys.
335 * @param source the string to be transformed into a collation key.
336 * @return the CollationKey for the given String based on this Collator's collation
337 * rules. If the source String is null, a null CollationKey is returned.
338 * @see java.text.CollationKey
339 * @see java.text.Collator#compare
340 */
341 public abstract CollationKey getCollationKey(String source);
342
343 /**
344 * Convenience method for comparing the equality of two strings based on
345 * this Collator's collation rules.
346 * @param source the source string to be compared with.
347 * @param target the target string to be compared with.
348 * @return true if the strings are equal according to the collation
349 * rules. false, otherwise.
350 * @see java.text.Collator#compare
351 */
352 public boolean equals(String source, String target)
353 {
354 return (compare(source, target) == Collator.EQUAL);
355 }
356
357 /**
358 * Returns this Collator's strength property. The strength property determines
359 * the minimum level of difference considered significant during comparison.
360 * See the Collator class description for an example of use.
361 * @return this Collator's current strength property.
362 * @see java.text.Collator#setStrength
363 * @see java.text.Collator#PRIMARY
364 * @see java.text.Collator#SECONDARY
365 * @see java.text.Collator#TERTIARY
366 * @see java.text.Collator#IDENTICAL
367 */
368 public synchronized int getStrength()
369 {
370 return strength;
371 }
372
373 /**
374 * Sets this Collator's strength property. The strength property determines
375 * the minimum level of difference considered significant during comparison.
376 * See the Collator class description for an example of use.
377 * @param newStrength the new strength value.
378 * @see java.text.Collator#getStrength
379 * @see java.text.Collator#PRIMARY
380 * @see java.text.Collator#SECONDARY
381 * @see java.text.Collator#TERTIARY
382 * @see java.text.Collator#IDENTICAL
383 * @exception IllegalArgumentException If the new strength value is not one of
384 * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
385 */
386 public synchronized void setStrength(int newStrength) {
387 if ((newStrength != PRIMARY) &&
388 (newStrength != SECONDARY) &&
389 (newStrength != TERTIARY) &&
390 (newStrength != IDENTICAL))
391 throw new IllegalArgumentException("Incorrect comparison level.");
392 strength = newStrength;
393 }
394
395 /**
396 * Get the decomposition mode of this Collator. Decomposition mode
397 * determines how Unicode composed characters are handled. Adjusting
398 * decomposition mode allows the user to select between faster and more
399 * complete collation behavior.
400 * <p>The three values for decomposition mode are:
401 * <UL>
402 * <LI>NO_DECOMPOSITION,
403 * <LI>CANONICAL_DECOMPOSITION
404 * <LI>FULL_DECOMPOSITION.
405 * </UL>
406 * See the documentation for these three constants for a description
407 * of their meaning.
408 * @return the decomposition mode
409 * @see java.text.Collator#setDecomposition
410 * @see java.text.Collator#NO_DECOMPOSITION
411 * @see java.text.Collator#CANONICAL_DECOMPOSITION
412 * @see java.text.Collator#FULL_DECOMPOSITION
413 */
414 public synchronized int getDecomposition()
415 {
416 return decmp;
417 }
418 /**
419 * Set the decomposition mode of this Collator. See getDecomposition
420 * for a description of decomposition mode.
421 * @param decompositionMode the new decomposition mode.
422 * @see java.text.Collator#getDecomposition
423 * @see java.text.Collator#NO_DECOMPOSITION
424 * @see java.text.Collator#CANONICAL_DECOMPOSITION
425 * @see java.text.Collator#FULL_DECOMPOSITION
426 * @exception IllegalArgumentException If the given value is not a valid decomposition
427 * mode.
428 */
429 public synchronized void setDecomposition(int decompositionMode) {
430 if ((decompositionMode != NO_DECOMPOSITION) &&
431 (decompositionMode != CANONICAL_DECOMPOSITION) &&
432 (decompositionMode != FULL_DECOMPOSITION))
433 throw new IllegalArgumentException("Wrong decomposition mode.");
434 decmp = decompositionMode;
435 }
436
437 /**
438 * Returns an array of all locales for which the
439 * <code>getInstance</code> methods of this class can return
440 * localized instances.
441 * The returned array represents the union of locales supported
442 * by the Java runtime and by installed
443 * {@link java.text.spi.CollatorProvider CollatorProvider} implementations.
444 * It must contain at least a Locale instance equal to
445 * {@link java.util.Locale#US Locale.US}.
446 *
447 * @return An array of locales for which localized
448 * <code>Collator</code> instances are available.
449 */
450 public static synchronized Locale[] getAvailableLocales() {
451 LocaleServiceProviderPool pool =
452 LocaleServiceProviderPool.getPool(CollatorProvider.class);
453 return pool.getAvailableLocales();
454 }
455
456 /**
457 * Overrides Cloneable
458 */
459 public Object clone()
460 {
461 try {
462 return (Collator)super.clone();
463 } catch (CloneNotSupportedException e) {
464 throw new InternalError();
465 }
466 }
467
468 /**
469 * Compares the equality of two Collators.
470 * @param that the Collator to be compared with this.
471 * @return true if this Collator is the same as that Collator;
472 * false otherwise.
473 */
474 public boolean equals(Object that)
475 {
476 if (this == that) return true;
477 if (that == null) return false;
478 if (getClass() != that.getClass()) return false;
479 Collator other = (Collator) that;
480 return ((strength == other.strength) &&
481 (decmp == other.decmp));
482 }
483
484 /**
485 * Generates the hash code for this Collator.
486 */
487 abstract public int hashCode();
488
489 /**
490 * Default constructor. This constructor is
491 * protected so subclasses can get access to it. Users typically create
492 * a Collator sub-class by calling the factory method getInstance.
493 * @see java.text.Collator#getInstance
494 */
495 protected Collator()
496 {
497 strength = TERTIARY;
498 decmp = CANONICAL_DECOMPOSITION;
499 }
500
501 private int strength = 0;
502 private int decmp = 0;
503 private static SoftCache cache = new SoftCache();
504
505 //
506 // FIXME: These three constants should be removed.
507 //
508 /**
509 * LESS is returned if source string is compared to be less than target
510 * string in the compare() method.
511 * @see java.text.Collator#compare
512 */
513 final static int LESS = -1;
514 /**
515 * EQUAL is returned if source string is compared to be equal to target
516 * string in the compare() method.
517 * @see java.text.Collator#compare
518 */
519 final static int EQUAL = 0;
520 /**
521 * GREATER is returned if source string is compared to be greater than
522 * target string in the compare() method.
523 * @see java.text.Collator#compare
524 */
525 final static int GREATER = 1;
526
527 /**
528 * Obtains a Collator instance from a CollatorProvider
529 * implementation.
530 */
531 private static class CollatorGetter
532 implements LocaleServiceProviderPool.LocalizedObjectGetter<CollatorProvider, Collator> {
533 private static final CollatorGetter INSTANCE = new CollatorGetter();
534
535 public Collator getObject(CollatorProvider collatorProvider,
536 Locale locale,
537 String key,
538 Object... params) {
539 assert params.length == 1;
540 Collator result = collatorProvider.getInstance(locale);
541 if (result != null) {
542 // put this Collator instance in the cache for two locales, one
543 // is for the desired locale, and the other is for the actual
544 // locale where the provider is found, which may be a fall back locale.
545 cache.put((Locale)params[0], result);
546 cache.put(locale, result);
547 return (Collator)result.clone();
548 }
549
550 return null;
551 }
552 }
553 }