blob: a1ffbf0b7c3a435f2f8b7066f4da9c83c92ef801 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package java.awt.font;
27
28/**
29 * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
30 * digits to other Unicode decimal digits. Users of this class will
31 * primarily be people who wish to present data using
32 * national digit shapes, but find it more convenient to represent the
33 * data internally using Latin-1 (European) digits. This does not
34 * interpret the deprecated numeric shape selector character (U+206E).
35 * <p>
36 * Instances of <code>NumericShaper</code> are typically applied
37 * as attributes to text with the
38 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
39 * of the <code>TextAttribute</code> class.
40 * For example, this code snippet causes a <code>TextLayout</code> to
41 * shape European digits to Arabic in an Arabic context:<br>
42 * <blockquote><pre>
43 * Map map = new HashMap();
44 * map.put(TextAttribute.NUMERIC_SHAPING,
45 * NumericShaper.getContextualShaper(NumericShaper.ARABIC));
46 * FontRenderContext frc = ...;
47 * TextLayout layout = new TextLayout(text, map, frc);
48 * layout.draw(g2d, x, y);
49 * </pre></blockquote>
50 * <br>
51 * It is also possible to perform numeric shaping explicitly using instances
52 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
53 * <blockquote><pre>
54 * char[] text = ...;
55 * // shape all EUROPEAN digits (except zero) to ARABIC digits
56 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
57 * shaper.shape(text, start, count);
58 *
59 * // shape European digits to ARABIC digits if preceding text is Arabic, or
60 * // shape European digits to TAMIL digits if preceding text is Tamil, or
61 * // leave European digits alone if there is no preceding text, or
62 * // preceding text is neither Arabic nor Tamil
63 * NumericShaper shaper =
64 * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
65 * NumericShaper.TAMIL,
66 * NumericShaper.EUROPEAN);
67 * shaper.shape(text. start, count);
68 * </pre></blockquote>
69 *
70 * @since 1.4
71 */
72
73public final class NumericShaper implements java.io.Serializable {
74 /** index of context for contextual shaping - values range from 0 to 18 */
75 private int key;
76
77 /** flag indicating whether to shape contextually (high bit) and which
78 * digit ranges to shape (bits 0-18)
79 */
80 private int mask;
81
82 /** Identifies the Latin-1 (European) and extended range, and
83 * Latin-1 (European) decimal base.
84 */
85 public static final int EUROPEAN = 1<<0;
86
87 /** Identifies the ARABIC range and decimal base. */
88 public static final int ARABIC = 1<<1;
89
90 /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
91 public static final int EASTERN_ARABIC = 1<<2;
92
93 /** Identifies the DEVANAGARI range and decimal base. */
94 public static final int DEVANAGARI = 1<<3;
95
96 /** Identifies the BENGALI range and decimal base. */
97 public static final int BENGALI = 1<<4;
98
99 /** Identifies the GURMUKHI range and decimal base. */
100 public static final int GURMUKHI = 1<<5;
101
102 /** Identifies the GUJARATI range and decimal base. */
103 public static final int GUJARATI = 1<<6;
104
105 /** Identifies the ORIYA range and decimal base. */
106 public static final int ORIYA = 1<<7;
107
108 /** Identifies the TAMIL range and decimal base. Tamil does not have a
109 * decimal digit 0 so Latin-1 (European) 0 is used.
110 */
111 public static final int TAMIL = 1<<8;
112
113 /** Identifies the TELUGU range and decimal base. */
114 public static final int TELUGU = 1<<9;
115
116 /** Identifies the KANNADA range and decimal base. */
117 public static final int KANNADA = 1<<10;
118
119 /** Identifies the MALAYALAM range and decimal base. */
120 public static final int MALAYALAM = 1<<11;
121
122 /** Identifies the THAI range and decimal base. */
123 public static final int THAI = 1<<12;
124
125 /** Identifies the LAO range and decimal base. */
126 public static final int LAO = 1<<13;
127
128 /** Identifies the TIBETAN range and decimal base. */
129 public static final int TIBETAN = 1<<14;
130
131 /** Identifies the MYANMAR range and decimal base. */
132 public static final int MYANMAR = 1<<15;
133
134 /** Identifies the ETHIOPIC range and decimal base. */
135 public static final int ETHIOPIC = 1<<16;
136
137 /** Identifies the KHMER range and decimal base. */
138 public static final int KHMER = 1<<17;
139
140 /** Identifies the MONGOLIAN range and decimal base. */
141 public static final int MONGOLIAN = 1<<18;
142
143 /** Identifies all ranges, for full contextual shaping. */
144 public static final int ALL_RANGES = 0x0007ffff;
145
146 private static final int EUROPEAN_KEY = 0;
147 private static final int ARABIC_KEY = 1;
148 private static final int EASTERN_ARABIC_KEY = 2;
149 private static final int DEVANAGARI_KEY = 3;
150 private static final int BENGALI_KEY = 4;
151 private static final int GURMUKHI_KEY = 5;
152 private static final int GUJARATI_KEY = 6;
153 private static final int ORIYA_KEY = 7;
154 private static final int TAMIL_KEY = 8;
155 private static final int TELUGU_KEY = 9;
156 private static final int KANNADA_KEY = 10;
157 private static final int MALAYALAM_KEY = 11;
158 private static final int THAI_KEY = 12;
159 private static final int LAO_KEY = 13;
160 private static final int TIBETAN_KEY = 14;
161 private static final int MYANMAR_KEY = 15;
162 private static final int ETHIOPIC_KEY = 16;
163 private static final int KHMER_KEY = 17;
164 private static final int MONGOLIAN_KEY = 18;
165
166 private static final int NUM_KEYS = 19;
167
168 private static final String[] keyNames = {
169 "EUROPEAN",
170 "ARABIC",
171 "EASTERN_ARABIC",
172 "DEVANAGARI",
173 "BENGALI",
174 "GURMUKHI",
175 "GUJARATI",
176 "ORIYA",
177 "TAMIL",
178 "TELUGU",
179 "KANNADA",
180 "MALAYALAM",
181 "THAI",
182 "LAO",
183 "TIBETAN",
184 "MYANMAR",
185 "ETHIOPIC",
186 "KHMER",
187 "MONGOLIAN"
188 };
189
190 private static final int CONTEXTUAL_MASK = 1<<31;
191
192 private static final char[] bases = {
193 '\u0030' - '\u0030', // EUROPEAN
194 '\u0660' - '\u0030', // ARABIC
195 '\u06f0' - '\u0030', // EASTERN_ARABIC
196 '\u0966' - '\u0030', // DEVANAGARI
197 '\u09e6' - '\u0030', // BENGALI
198 '\u0a66' - '\u0030', // GURMUKHI
199 '\u0ae6' - '\u0030', // GUJARATI
200 '\u0b66' - '\u0030', // ORIYA
201 '\u0be7' - '\u0030', // TAMIL - note missing zero
202 '\u0c66' - '\u0030', // TELUGU
203 '\u0ce6' - '\u0030', // KANNADA
204 '\u0d66' - '\u0030', // MALAYALAM
205 '\u0e50' - '\u0030', // THAI
206 '\u0ed0' - '\u0030', // LAO
207 '\u0f20' - '\u0030', // TIBETAN
208 '\u1040' - '\u0030', // MYANMAR
209 '\u1369' - '\u0030', // ETHIOPIC
210 '\u17e0' - '\u0030', // KHMER
211 '\u1810' - '\u0030', // MONGOLIAN
212 };
213
214 // some ranges adjoin or overlap, rethink if we want to do a binary search on this
215
216 private static final char[] contexts = {
217 '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
218 '\u0600', '\u0700', // ARABIC
219 '\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic
220 '\u0900', '\u0980', // DEVANAGARI
221 '\u0980', '\u0a00', // BENGALI
222 '\u0a00', '\u0a80', // GURMUKHI
223 '\u0a80', '\u0b00', // GUJARATI
224 '\u0b00', '\u0b80', // ORIYA
225 '\u0b80', '\u0c00', // TAMIL - note missing zero
226 '\u0c00', '\u0c80', // TELUGU
227 '\u0c80', '\u0d00', // KANNADA
228 '\u0d00', '\u0d80', // MALAYALAM
229 '\u0e00', '\u0e80', // THAI
230 '\u0e80', '\u0f00', // LAO
231 '\u0f00', '\u1000', // TIBETAN
232 '\u1000', '\u1080', // MYANMAR
233 '\u1200', '\u1380', // ETHIOPIC
234 '\u1780', '\u1800', // KHMER
235 '\u1800', '\u1900', // MONGOLIAN
236 '\uffff',
237 };
238
239 // assume most characters are near each other so probing the cache is infrequent,
240 // and a linear probe is ok.
241
242 private static int ctCache = 0;
243 private static int ctCacheLimit = contexts.length - 2;
244
245 // warning, synchronize access to this as it modifies state
246 private static int getContextKey(char c) {
247 if (c < contexts[ctCache]) {
248 while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
249 } else if (c >= contexts[ctCache + 1]) {
250 while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
251 }
252
253 // if we're not in a known range, then return EUROPEAN as the range key
254 return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
255 }
256
257 /*
258 * A range table of strong directional characters (types L, R, AL).
259 * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
260 * characters, odd (right) indexes are starts of ranges of strong directional
261 * characters.
262 */
263 private static char[] strongTable = {
264 '\u0000', '\u0041',
265 '\u005b', '\u0061',
266 '\u007b', '\u00aa',
267 '\u00ab', '\u00b5',
268 '\u00b6', '\u00ba',
269 '\u00bb', '\u00c0',
270 '\u00d7', '\u00d8',
271 '\u00f7', '\u00f8',
272 '\u0220', '\u0222',
273 '\u0234', '\u0250',
274 '\u02ae', '\u02b0',
275 '\u02b9', '\u02bb',
276 '\u02c2', '\u02d0',
277 '\u02d2', '\u02e0',
278 '\u02e5', '\u02ee',
279 '\u02ef', '\u037a',
280 '\u037b', '\u0386',
281 '\u0387', '\u0388',
282 '\u038b', '\u038c',
283 '\u038d', '\u038e',
284 '\u03a2', '\u03a3',
285 '\u03cf', '\u03d0',
286 '\u03d8', '\u03da',
287 '\u03f4', '\u0400',
288 '\u0483', '\u048c',
289 '\u04c5', '\u04c7',
290 '\u04c9', '\u04cb',
291 '\u04cd', '\u04d0',
292 '\u04f6', '\u04f8',
293 '\u04fa', '\u0531',
294 '\u0557', '\u0559',
295 '\u0560', '\u0561',
296 '\u0588', '\u0589',
297 '\u058a', '\u05be',
298 '\u05bf', '\u05c0',
299 '\u05c1', '\u05c3',
300 '\u05c4', '\u05d0',
301 '\u05eb', '\u05f0',
302 '\u05f5', '\u061b',
303 '\u061c', '\u061f',
304 '\u0620', '\u0621',
305 '\u063b', '\u0640',
306 '\u064b', '\u066d',
307 '\u066e', '\u0671',
308 '\u06d6', '\u06e5',
309 '\u06e7', '\u06fa',
310 '\u06ff', '\u0700',
311 '\u070e', '\u0710',
312 '\u0711', '\u0712',
313 '\u072d', '\u0780',
314 '\u07a6', '\u0903',
315 '\u0904', '\u0905',
316 '\u093a', '\u093d',
317 '\u0941', '\u0949',
318 '\u094d', '\u0950',
319 '\u0951', '\u0958',
320 '\u0962', '\u0964',
321 '\u0971', '\u0982',
322 '\u0984', '\u0985',
323 '\u098d', '\u098f',
324 '\u0991', '\u0993',
325 '\u09a9', '\u09aa',
326 '\u09b1', '\u09b2',
327 '\u09b3', '\u09b6',
328 '\u09ba', '\u09be',
329 '\u09c1', '\u09c7',
330 '\u09c9', '\u09cb',
331 '\u09cd', '\u09d7',
332 '\u09d8', '\u09dc',
333 '\u09de', '\u09df',
334 '\u09e2', '\u09e6',
335 '\u09f2', '\u09f4',
336 '\u09fb', '\u0a05',
337 '\u0a0b', '\u0a0f',
338 '\u0a11', '\u0a13',
339 '\u0a29', '\u0a2a',
340 '\u0a31', '\u0a32',
341 '\u0a34', '\u0a35',
342 '\u0a37', '\u0a38',
343 '\u0a3a', '\u0a3e',
344 '\u0a41', '\u0a59',
345 '\u0a5d', '\u0a5e',
346 '\u0a5f', '\u0a66',
347 '\u0a70', '\u0a72',
348 '\u0a75', '\u0a83',
349 '\u0a84', '\u0a85',
350 '\u0a8c', '\u0a8d',
351 '\u0a8e', '\u0a8f',
352 '\u0a92', '\u0a93',
353 '\u0aa9', '\u0aaa',
354 '\u0ab1', '\u0ab2',
355 '\u0ab4', '\u0ab5',
356 '\u0aba', '\u0abd',
357 '\u0ac1', '\u0ac9',
358 '\u0aca', '\u0acb',
359 '\u0acd', '\u0ad0',
360 '\u0ad1', '\u0ae0',
361 '\u0ae1', '\u0ae6',
362 '\u0af0', '\u0b02',
363 '\u0b04', '\u0b05',
364 '\u0b0d', '\u0b0f',
365 '\u0b11', '\u0b13',
366 '\u0b29', '\u0b2a',
367 '\u0b31', '\u0b32',
368 '\u0b34', '\u0b36',
369 '\u0b3a', '\u0b3d',
370 '\u0b3f', '\u0b40',
371 '\u0b41', '\u0b47',
372 '\u0b49', '\u0b4b',
373 '\u0b4d', '\u0b57',
374 '\u0b58', '\u0b5c',
375 '\u0b5e', '\u0b5f',
376 '\u0b62', '\u0b66',
377 '\u0b71', '\u0b83',
378 '\u0b84', '\u0b85',
379 '\u0b8b', '\u0b8e',
380 '\u0b91', '\u0b92',
381 '\u0b96', '\u0b99',
382 '\u0b9b', '\u0b9c',
383 '\u0b9d', '\u0b9e',
384 '\u0ba0', '\u0ba3',
385 '\u0ba5', '\u0ba8',
386 '\u0bab', '\u0bae',
387 '\u0bb6', '\u0bb7',
388 '\u0bba', '\u0bbe',
389 '\u0bc0', '\u0bc1',
390 '\u0bc3', '\u0bc6',
391 '\u0bc9', '\u0bca',
392 '\u0bcd', '\u0bd7',
393 '\u0bd8', '\u0be7',
394 '\u0bf3', '\u0c01',
395 '\u0c04', '\u0c05',
396 '\u0c0d', '\u0c0e',
397 '\u0c11', '\u0c12',
398 '\u0c29', '\u0c2a',
399 '\u0c34', '\u0c35',
400 '\u0c3a', '\u0c41',
401 '\u0c45', '\u0c60',
402 '\u0c62', '\u0c66',
403 '\u0c70', '\u0c82',
404 '\u0c84', '\u0c85',
405 '\u0c8d', '\u0c8e',
406 '\u0c91', '\u0c92',
407 '\u0ca9', '\u0caa',
408 '\u0cb4', '\u0cb5',
409 '\u0cba', '\u0cbe',
410 '\u0cbf', '\u0cc0',
411 '\u0cc5', '\u0cc7',
412 '\u0cc9', '\u0cca',
413 '\u0ccc', '\u0cd5',
414 '\u0cd7', '\u0cde',
415 '\u0cdf', '\u0ce0',
416 '\u0ce2', '\u0ce6',
417 '\u0cf0', '\u0d02',
418 '\u0d04', '\u0d05',
419 '\u0d0d', '\u0d0e',
420 '\u0d11', '\u0d12',
421 '\u0d29', '\u0d2a',
422 '\u0d3a', '\u0d3e',
423 '\u0d41', '\u0d46',
424 '\u0d49', '\u0d4a',
425 '\u0d4d', '\u0d57',
426 '\u0d58', '\u0d60',
427 '\u0d62', '\u0d66',
428 '\u0d70', '\u0d82',
429 '\u0d84', '\u0d85',
430 '\u0d97', '\u0d9a',
431 '\u0db2', '\u0db3',
432 '\u0dbc', '\u0dbd',
433 '\u0dbe', '\u0dc0',
434 '\u0dc7', '\u0dcf',
435 '\u0dd2', '\u0dd8',
436 '\u0de0', '\u0df2',
437 '\u0df5', '\u0e01',
438 '\u0e31', '\u0e32',
439 '\u0e34', '\u0e40',
440 '\u0e47', '\u0e4f',
441 '\u0e5c', '\u0e81',
442 '\u0e83', '\u0e84',
443 '\u0e85', '\u0e87',
444 '\u0e89', '\u0e8a',
445 '\u0e8b', '\u0e8d',
446 '\u0e8e', '\u0e94',
447 '\u0e98', '\u0e99',
448 '\u0ea0', '\u0ea1',
449 '\u0ea4', '\u0ea5',
450 '\u0ea6', '\u0ea7',
451 '\u0ea8', '\u0eaa',
452 '\u0eac', '\u0ead',
453 '\u0eb1', '\u0eb2',
454 '\u0eb4', '\u0ebd',
455 '\u0ebe', '\u0ec0',
456 '\u0ec5', '\u0ec6',
457 '\u0ec7', '\u0ed0',
458 '\u0eda', '\u0edc',
459 '\u0ede', '\u0f00',
460 '\u0f18', '\u0f1a',
461 '\u0f35', '\u0f36',
462 '\u0f37', '\u0f38',
463 '\u0f39', '\u0f3e',
464 '\u0f48', '\u0f49',
465 '\u0f6b', '\u0f7f',
466 '\u0f80', '\u0f85',
467 '\u0f86', '\u0f88',
468 '\u0f8c', '\u0fbe',
469 '\u0fc6', '\u0fc7',
470 '\u0fcd', '\u0fcf',
471 '\u0fd0', '\u1000',
472 '\u1022', '\u1023',
473 '\u1028', '\u1029',
474 '\u102b', '\u102c',
475 '\u102d', '\u1031',
476 '\u1032', '\u1038',
477 '\u1039', '\u1040',
478 '\u1058', '\u10a0',
479 '\u10c6', '\u10d0',
480 '\u10f7', '\u10fb',
481 '\u10fc', '\u1100',
482 '\u115a', '\u115f',
483 '\u11a3', '\u11a8',
484 '\u11fa', '\u1200',
485 '\u1207', '\u1208',
486 '\u1247', '\u1248',
487 '\u1249', '\u124a',
488 '\u124e', '\u1250',
489 '\u1257', '\u1258',
490 '\u1259', '\u125a',
491 '\u125e', '\u1260',
492 '\u1287', '\u1288',
493 '\u1289', '\u128a',
494 '\u128e', '\u1290',
495 '\u12af', '\u12b0',
496 '\u12b1', '\u12b2',
497 '\u12b6', '\u12b8',
498 '\u12bf', '\u12c0',
499 '\u12c1', '\u12c2',
500 '\u12c6', '\u12c8',
501 '\u12cf', '\u12d0',
502 '\u12d7', '\u12d8',
503 '\u12ef', '\u12f0',
504 '\u130f', '\u1310',
505 '\u1311', '\u1312',
506 '\u1316', '\u1318',
507 '\u131f', '\u1320',
508 '\u1347', '\u1348',
509 '\u135b', '\u1361',
510 '\u137d', '\u13a0',
511 '\u13f5', '\u1401',
512 '\u1677', '\u1681',
513 '\u169b', '\u16a0',
514 '\u16f1', '\u1780',
515 '\u17b7', '\u17be',
516 '\u17c6', '\u17c7',
517 '\u17c9', '\u17d4',
518 '\u17db', '\u17dc',
519 '\u17dd', '\u17e0',
520 '\u17ea', '\u1810',
521 '\u181a', '\u1820',
522 '\u1878', '\u1880',
523 '\u18a9', '\u1e00',
524 '\u1e9c', '\u1ea0',
525 '\u1efa', '\u1f00',
526 '\u1f16', '\u1f18',
527 '\u1f1e', '\u1f20',
528 '\u1f46', '\u1f48',
529 '\u1f4e', '\u1f50',
530 '\u1f58', '\u1f59',
531 '\u1f5a', '\u1f5b',
532 '\u1f5c', '\u1f5d',
533 '\u1f5e', '\u1f5f',
534 '\u1f7e', '\u1f80',
535 '\u1fb5', '\u1fb6',
536 '\u1fbd', '\u1fbe',
537 '\u1fbf', '\u1fc2',
538 '\u1fc5', '\u1fc6',
539 '\u1fcd', '\u1fd0',
540 '\u1fd4', '\u1fd6',
541 '\u1fdc', '\u1fe0',
542 '\u1fed', '\u1ff2',
543 '\u1ff5', '\u1ff6',
544 '\u1ffd', '\u200e',
545 '\u2010', '\u207f',
546 '\u2080', '\u2102',
547 '\u2103', '\u2107',
548 '\u2108', '\u210a',
549 '\u2114', '\u2115',
550 '\u2116', '\u2119',
551 '\u211e', '\u2124',
552 '\u2125', '\u2126',
553 '\u2127', '\u2128',
554 '\u2129', '\u212a',
555 '\u212e', '\u212f',
556 '\u2132', '\u2133',
557 '\u213a', '\u2160',
558 '\u2184', '\u2336',
559 '\u237b', '\u2395',
560 '\u2396', '\u249c',
561 '\u24ea', '\u3005',
562 '\u3008', '\u3021',
563 '\u302a', '\u3031',
564 '\u3036', '\u3038',
565 '\u303b', '\u3041',
566 '\u3095', '\u309d',
567 '\u309f', '\u30a1',
568 '\u30fb', '\u30fc',
569 '\u30ff', '\u3105',
570 '\u312d', '\u3131',
571 '\u318f', '\u3190',
572 '\u31b8', '\u3200',
573 '\u321d', '\u3220',
574 '\u3244', '\u3260',
575 '\u327c', '\u327f',
576 '\u32b1', '\u32c0',
577 '\u32cc', '\u32d0',
578 '\u32ff', '\u3300',
579 '\u3377', '\u337b',
580 '\u33de', '\u33e0',
581 '\u33ff', '\u3400',
582 '\u4db6', '\u4e00',
583 '\u9fa6', '\ua000',
584 '\ua48d', '\uac00',
585 '\ud7a4', '\uf900',
586 '\ufa2e', '\ufb00',
587 '\ufb07', '\ufb13',
588 '\ufb18', '\ufb1d',
589 '\ufb1e', '\ufb1f',
590 '\ufb29', '\ufb2a',
591 '\ufb37', '\ufb38',
592 '\ufb3d', '\ufb3e',
593 '\ufb3f', '\ufb40',
594 '\ufb42', '\ufb43',
595 '\ufb45', '\ufb46',
596 '\ufbb2', '\ufbd3',
597 '\ufd3e', '\ufd50',
598 '\ufd90', '\ufd92',
599 '\ufdc8', '\ufdf0',
600 '\ufdfc', '\ufe70',
601 '\ufe73', '\ufe74',
602 '\ufe75', '\ufe76',
603 '\ufefd', '\uff21',
604 '\uff3b', '\uff41',
605 '\uff5b', '\uff66',
606 '\uffbf', '\uffc2',
607 '\uffc8', '\uffca',
608 '\uffd0', '\uffd2',
609 '\uffd8', '\uffda',
610 '\uffdd', '\uffff' // last entry is sentinel, actually never checked
611 };
612
613
614 // use a binary search with a cache
615
616 private static int stCache = 0;
617
618 // warning, synchronize access to this as it modifies state
619 private static boolean isStrongDirectional(char c) {
620 if (c < strongTable[stCache]) {
621 stCache = search(c, strongTable, 0, stCache);
622 } else if (c >= strongTable[stCache + 1]) {
623 stCache = search(c, strongTable, stCache + 1, strongTable.length - stCache - 1);
624 }
625 return (stCache & 0x1) == 1;
626 }
627
628 static private int getKeyFromMask(int mask) {
629 int key = 0;
630 while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
631 ++key;
632 }
633 if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
634 throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
635 }
636 return key;
637 }
638
639 /**
640 * Returns a shaper for the provided unicode range. All
641 * Latin-1 (EUROPEAN) digits are converted
642 * to the corresponding decimal unicode digits.
643 * @param singleRange the specified Unicode range
644 * @return a non-contextual numeric shaper
645 * @throws IllegalArgumentException if the range is not a single range
646 */
647 static public NumericShaper getShaper(int singleRange) {
648 int key = getKeyFromMask(singleRange);
649 return new NumericShaper(key, singleRange);
650 }
651
652 /**
653 * Returns a contextual shaper for the provided unicode range(s).
654 * Latin-1 (EUROPEAN) digits are converted to the decimal digits
655 * corresponding to the range of the preceding text, if the
656 * range is one of the provided ranges. Multiple ranges are
657 * represented by or-ing the values together, such as,
658 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
659 * shaper assumes EUROPEAN as the starting context, that is, if
660 * EUROPEAN digits are encountered before any strong directional
661 * text in the string, the context is presumed to be EUROPEAN, and
662 * so the digits will not shape.
663 * @param ranges the specified Unicode ranges
664 * @return a shaper for the specified ranges
665 */
666 static public NumericShaper getContextualShaper(int ranges) {
667 ranges |= CONTEXTUAL_MASK;
668 return new NumericShaper(EUROPEAN_KEY, ranges);
669 }
670
671 /**
672 * Returns a contextual shaper for the provided unicode range(s).
673 * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
674 * corresponding to the range of the preceding text, if the
675 * range is one of the provided ranges. Multiple ranges are
676 * represented by or-ing the values together, for example,
677 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
678 * shaper uses defaultContext as the starting context.
679 * @param ranges the specified Unicode ranges
680 * @param defaultContext the starting context, such as
681 * <code>NumericShaper.EUROPEAN</code>
682 * @return a shaper for the specified Unicode ranges.
683 * @throws IllegalArgumentException if the specified
684 * <code>defaultContext</code> is not a single valid range.
685 */
686 static public NumericShaper getContextualShaper(int ranges, int defaultContext) {
687 int key = getKeyFromMask(defaultContext);
688 ranges |= CONTEXTUAL_MASK;
689 return new NumericShaper(key, ranges);
690 }
691
692 /**
693 * Private constructor.
694 */
695 private NumericShaper(int key, int mask) {
696 this.key = key;
697 this.mask = mask;
698 }
699
700 /**
701 * Converts the digits in the text that occur between start and
702 * start + count.
703 * @param text an array of characters to convert
704 * @param start the index into <code>text</code> to start
705 * converting
706 * @param count the number of characters in <code>text</code>
707 * to convert
708 * @throws IndexOutOfBoundsException if start or start + count is
709 * out of bounds
710 * @throws NullPointerException if text is null
711 */
712 public void shape(char[] text, int start, int count) {
713 if (text == null) {
714 throw new NullPointerException("text is null");
715 }
716 if ((start < 0)
717 || (start > text.length)
718 || ((start + count) < 0)
719 || ((start + count) > text.length)) {
720 throw new IndexOutOfBoundsException(
721 "bad start or count for text of length " + text.length);
722 }
723
724 if (isContextual()) {
725 shapeContextually(text, start, count, key);
726 } else {
727 shapeNonContextually(text, start, count);
728 }
729 }
730
731 /**
732 * Converts the digits in the text that occur between start and
733 * start + count, using the provided context.
734 * Context is ignored if the shaper is not a contextual shaper.
735 * @param text an array of characters
736 * @param start the index into <code>text</code> to start
737 * converting
738 * @param count the number of characters in <code>text</code>
739 * to convert
740 * @param context the context to which to convert the
741 * characters, such as <code>NumericShaper.EUROPEAN</code>
742 * @throws IndexOutOfBoundsException if start or start + count is
743 * out of bounds
744 * @throws NullPointerException if text is null
745 * @throws IllegalArgumentException if this is a contextual shaper
746 * and the specified <code>context</code> is not a single valid
747 * range.
748 */
749 public void shape(char[] text, int start, int count, int context) {
750 if (text == null) {
751 throw new NullPointerException("text is null");
752 }
753 if ((start < 0)
754 || (start > text.length)
755 || ((start + count) < 0)
756 || ((start + count) > text.length)) {
757 throw new IndexOutOfBoundsException(
758 "bad start or count for text of length " + text.length);
759 }
760
761 if (isContextual()) {
762 int ctxKey = getKeyFromMask(context);
763 shapeContextually(text, start, count, ctxKey);
764 } else {
765 shapeNonContextually(text, start, count);
766 }
767 }
768
769 /**
770 * Returns a <code>boolean</code> indicating whether or not
771 * this shaper shapes contextually.
772 * @return <code>true</code> if this shaper is contextual;
773 * <code>false</code> otherwise.
774 */
775 public boolean isContextual() {
776 return (mask & CONTEXTUAL_MASK) != 0;
777 }
778
779 /**
780 * Returns an <code>int</code> that ORs together the values for
781 * all the ranges that will be shaped.
782 * <p>
783 * For example, to check if a shaper shapes to Arabic, you would use the
784 * following:
785 * <blockquote>
786 * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
787 * </blockquote>
788 * @return the values for all the ranges to be shaped.
789 */
790 public int getRanges() {
791 return mask & ~CONTEXTUAL_MASK;
792 }
793
794 /**
795 * Perform non-contextual shaping.
796 */
797 private void shapeNonContextually(char[] text, int start, int count) {
798 int base = bases[key];
799 char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
800 for (int i = start, e = start + count; i < e; ++i) {
801 char c = text[i];
802 if (c >= minDigit && c <= '\u0039') {
803 text[i] = (char)(c + base);
804 }
805 }
806 }
807
808 /**
809 * Perform contextual shaping.
810 * Synchronized to protect caches used in getContextKey and isStrongDirectional.
811 */
812 private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
813
814 // if we don't support this context, then don't shape
815 if ((mask & (1<<ctxKey)) == 0) {
816 ctxKey = EUROPEAN_KEY;
817 }
818 int lastkey = ctxKey;
819
820 int base = bases[ctxKey];
821 char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
822
823 for (int i = start, e = start + count; i < e; ++i) {
824 char c = text[i];
825 if (c >= minDigit && c <= '\u0039') {
826 text[i] = (char)(c + base);
827 }
828
829 if (isStrongDirectional(c)) {
830 int newkey = getContextKey(c);
831 if (newkey != lastkey) {
832 lastkey = newkey;
833
834 ctxKey = newkey;
835 if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) {
836 ctxKey = EASTERN_ARABIC_KEY;
837 } else if ((mask & (1<<ctxKey)) == 0) {
838 ctxKey = EUROPEAN_KEY;
839 }
840
841 base = bases[ctxKey];
842
843 minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
844 }
845 }
846 }
847 }
848
849 /**
850 * Returns a hash code for this shaper.
851 * @return this shaper's hash code.
852 * @see java.lang.Object#hashCode
853 */
854 public int hashCode() {
855 return mask;
856 }
857
858 /**
859 * Returns true if the specified object is an instance of
860 * <code>NumericShaper</code> and shapes identically to this one.
861 * @param o the specified object to compare to this
862 * <code>NumericShaper</code>
863 * @return <code>true</code> if <code>o</code> is an instance
864 * of <code>NumericShaper</code> and shapes in the same way;
865 * <code>false</code> otherwise.
866 * @see java.lang.Object#equals(java.lang.Object)
867 */
868 public boolean equals(Object o) {
869 if (o != null) {
870 try {
871 NumericShaper rhs = (NumericShaper)o;
872 return rhs.mask == mask && rhs.key == key;
873 }
874 catch (ClassCastException e) {
875 }
876 }
877 return false;
878 }
879
880 /**
881 * Returns a <code>String</code> that describes this shaper. This method
882 * is used for debugging purposes only.
883 * @return a <code>String</code> describing this shaper.
884 */
885 public String toString() {
886 StringBuilder buf = new StringBuilder(super.toString());
887
888 buf.append("[contextual:" + isContextual());
889
890 if (isContextual()) {
891 buf.append(", context:" + keyNames[key]);
892 }
893
894 buf.append(", range(s): ");
895 boolean first = true;
896 for (int i = 0; i < NUM_KEYS; ++i) {
897 if ((mask & (1 << i)) != 0) {
898 if (first) {
899 first = false;
900 } else {
901 buf.append(", ");
902 }
903 buf.append(keyNames[i]);
904 }
905 }
906 buf.append(']');
907
908 return buf.toString();
909 }
910
911 /**
912 * Returns the index of the high bit in value (assuming le, actually
913 * power of 2 >= value). value must be positive.
914 */
915 private static int getHighBit(int value) {
916 if (value <= 0) {
917 return -32;
918 }
919
920 int bit = 0;
921
922 if (value >= 1 << 16) {
923 value >>= 16;
924 bit += 16;
925 }
926
927 if (value >= 1 << 8) {
928 value >>= 8;
929 bit += 8;
930 }
931
932 if (value >= 1 << 4) {
933 value >>= 4;
934 bit += 4;
935 }
936
937 if (value >= 1 << 2) {
938 value >>= 2;
939 bit += 2;
940 }
941
942 if (value >= 1 << 1) {
943 value >>= 1;
944 bit += 1;
945 }
946
947 return bit;
948 }
949
950 /**
951 * fast binary search over subrange of array.
952 */
953 private static int search(char value, char[] array, int start, int length)
954 {
955 int power = 1 << getHighBit(length);
956 int extra = length - power;
957 int probe = power;
958 int index = start;
959
960 if (value >= array[index + extra]) {
961 index += extra;
962 }
963
964 while (probe > 1) {
965 probe >>= 1;
966
967 if (value >= array[index + probe]) {
968 index += probe;
969 }
970 }
971
972 return index;
973 }
974}