J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package java.awt.font; |
| 27 | |
| 28 | /** |
| 29 | * The <code>NumericShaper</code> class is used to convert Latin-1 (European) |
| 30 | * digits to other Unicode decimal digits. Users of this class will |
| 31 | * primarily be people who wish to present data using |
| 32 | * national digit shapes, but find it more convenient to represent the |
| 33 | * data internally using Latin-1 (European) digits. This does not |
| 34 | * interpret the deprecated numeric shape selector character (U+206E). |
| 35 | * <p> |
| 36 | * Instances of <code>NumericShaper</code> are typically applied |
| 37 | * as attributes to text with the |
| 38 | * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute |
| 39 | * of the <code>TextAttribute</code> class. |
| 40 | * For example, this code snippet causes a <code>TextLayout</code> to |
| 41 | * shape European digits to Arabic in an Arabic context:<br> |
| 42 | * <blockquote><pre> |
| 43 | * Map map = new HashMap(); |
| 44 | * map.put(TextAttribute.NUMERIC_SHAPING, |
| 45 | * NumericShaper.getContextualShaper(NumericShaper.ARABIC)); |
| 46 | * FontRenderContext frc = ...; |
| 47 | * TextLayout layout = new TextLayout(text, map, frc); |
| 48 | * layout.draw(g2d, x, y); |
| 49 | * </pre></blockquote> |
| 50 | * <br> |
| 51 | * It is also possible to perform numeric shaping explicitly using instances |
| 52 | * of <code>NumericShaper</code>, as this code snippet demonstrates:<br> |
| 53 | * <blockquote><pre> |
| 54 | * char[] text = ...; |
| 55 | * // shape all EUROPEAN digits (except zero) to ARABIC digits |
| 56 | * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC); |
| 57 | * shaper.shape(text, start, count); |
| 58 | * |
| 59 | * // shape European digits to ARABIC digits if preceding text is Arabic, or |
| 60 | * // shape European digits to TAMIL digits if preceding text is Tamil, or |
| 61 | * // leave European digits alone if there is no preceding text, or |
| 62 | * // preceding text is neither Arabic nor Tamil |
| 63 | * NumericShaper shaper = |
| 64 | * NumericShaper.getContextualShaper(NumericShaper.ARABIC | |
| 65 | * NumericShaper.TAMIL, |
| 66 | * NumericShaper.EUROPEAN); |
| 67 | * shaper.shape(text. start, count); |
| 68 | * </pre></blockquote> |
| 69 | * |
| 70 | * @since 1.4 |
| 71 | */ |
| 72 | |
| 73 | public final class NumericShaper implements java.io.Serializable { |
| 74 | /** index of context for contextual shaping - values range from 0 to 18 */ |
| 75 | private int key; |
| 76 | |
| 77 | /** flag indicating whether to shape contextually (high bit) and which |
| 78 | * digit ranges to shape (bits 0-18) |
| 79 | */ |
| 80 | private int mask; |
| 81 | |
| 82 | /** Identifies the Latin-1 (European) and extended range, and |
| 83 | * Latin-1 (European) decimal base. |
| 84 | */ |
| 85 | public static final int EUROPEAN = 1<<0; |
| 86 | |
| 87 | /** Identifies the ARABIC range and decimal base. */ |
| 88 | public static final int ARABIC = 1<<1; |
| 89 | |
| 90 | /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */ |
| 91 | public static final int EASTERN_ARABIC = 1<<2; |
| 92 | |
| 93 | /** Identifies the DEVANAGARI range and decimal base. */ |
| 94 | public static final int DEVANAGARI = 1<<3; |
| 95 | |
| 96 | /** Identifies the BENGALI range and decimal base. */ |
| 97 | public static final int BENGALI = 1<<4; |
| 98 | |
| 99 | /** Identifies the GURMUKHI range and decimal base. */ |
| 100 | public static final int GURMUKHI = 1<<5; |
| 101 | |
| 102 | /** Identifies the GUJARATI range and decimal base. */ |
| 103 | public static final int GUJARATI = 1<<6; |
| 104 | |
| 105 | /** Identifies the ORIYA range and decimal base. */ |
| 106 | public static final int ORIYA = 1<<7; |
| 107 | |
| 108 | /** Identifies the TAMIL range and decimal base. Tamil does not have a |
| 109 | * decimal digit 0 so Latin-1 (European) 0 is used. |
| 110 | */ |
| 111 | public static final int TAMIL = 1<<8; |
| 112 | |
| 113 | /** Identifies the TELUGU range and decimal base. */ |
| 114 | public static final int TELUGU = 1<<9; |
| 115 | |
| 116 | /** Identifies the KANNADA range and decimal base. */ |
| 117 | public static final int KANNADA = 1<<10; |
| 118 | |
| 119 | /** Identifies the MALAYALAM range and decimal base. */ |
| 120 | public static final int MALAYALAM = 1<<11; |
| 121 | |
| 122 | /** Identifies the THAI range and decimal base. */ |
| 123 | public static final int THAI = 1<<12; |
| 124 | |
| 125 | /** Identifies the LAO range and decimal base. */ |
| 126 | public static final int LAO = 1<<13; |
| 127 | |
| 128 | /** Identifies the TIBETAN range and decimal base. */ |
| 129 | public static final int TIBETAN = 1<<14; |
| 130 | |
| 131 | /** Identifies the MYANMAR range and decimal base. */ |
| 132 | public static final int MYANMAR = 1<<15; |
| 133 | |
| 134 | /** Identifies the ETHIOPIC range and decimal base. */ |
| 135 | public static final int ETHIOPIC = 1<<16; |
| 136 | |
| 137 | /** Identifies the KHMER range and decimal base. */ |
| 138 | public static final int KHMER = 1<<17; |
| 139 | |
| 140 | /** Identifies the MONGOLIAN range and decimal base. */ |
| 141 | public static final int MONGOLIAN = 1<<18; |
| 142 | |
| 143 | /** Identifies all ranges, for full contextual shaping. */ |
| 144 | public static final int ALL_RANGES = 0x0007ffff; |
| 145 | |
| 146 | private static final int EUROPEAN_KEY = 0; |
| 147 | private static final int ARABIC_KEY = 1; |
| 148 | private static final int EASTERN_ARABIC_KEY = 2; |
| 149 | private static final int DEVANAGARI_KEY = 3; |
| 150 | private static final int BENGALI_KEY = 4; |
| 151 | private static final int GURMUKHI_KEY = 5; |
| 152 | private static final int GUJARATI_KEY = 6; |
| 153 | private static final int ORIYA_KEY = 7; |
| 154 | private static final int TAMIL_KEY = 8; |
| 155 | private static final int TELUGU_KEY = 9; |
| 156 | private static final int KANNADA_KEY = 10; |
| 157 | private static final int MALAYALAM_KEY = 11; |
| 158 | private static final int THAI_KEY = 12; |
| 159 | private static final int LAO_KEY = 13; |
| 160 | private static final int TIBETAN_KEY = 14; |
| 161 | private static final int MYANMAR_KEY = 15; |
| 162 | private static final int ETHIOPIC_KEY = 16; |
| 163 | private static final int KHMER_KEY = 17; |
| 164 | private static final int MONGOLIAN_KEY = 18; |
| 165 | |
| 166 | private static final int NUM_KEYS = 19; |
| 167 | |
| 168 | private static final String[] keyNames = { |
| 169 | "EUROPEAN", |
| 170 | "ARABIC", |
| 171 | "EASTERN_ARABIC", |
| 172 | "DEVANAGARI", |
| 173 | "BENGALI", |
| 174 | "GURMUKHI", |
| 175 | "GUJARATI", |
| 176 | "ORIYA", |
| 177 | "TAMIL", |
| 178 | "TELUGU", |
| 179 | "KANNADA", |
| 180 | "MALAYALAM", |
| 181 | "THAI", |
| 182 | "LAO", |
| 183 | "TIBETAN", |
| 184 | "MYANMAR", |
| 185 | "ETHIOPIC", |
| 186 | "KHMER", |
| 187 | "MONGOLIAN" |
| 188 | }; |
| 189 | |
| 190 | private static final int CONTEXTUAL_MASK = 1<<31; |
| 191 | |
| 192 | private static final char[] bases = { |
| 193 | '\u0030' - '\u0030', // EUROPEAN |
| 194 | '\u0660' - '\u0030', // ARABIC |
| 195 | '\u06f0' - '\u0030', // EASTERN_ARABIC |
| 196 | '\u0966' - '\u0030', // DEVANAGARI |
| 197 | '\u09e6' - '\u0030', // BENGALI |
| 198 | '\u0a66' - '\u0030', // GURMUKHI |
| 199 | '\u0ae6' - '\u0030', // GUJARATI |
| 200 | '\u0b66' - '\u0030', // ORIYA |
| 201 | '\u0be7' - '\u0030', // TAMIL - note missing zero |
| 202 | '\u0c66' - '\u0030', // TELUGU |
| 203 | '\u0ce6' - '\u0030', // KANNADA |
| 204 | '\u0d66' - '\u0030', // MALAYALAM |
| 205 | '\u0e50' - '\u0030', // THAI |
| 206 | '\u0ed0' - '\u0030', // LAO |
| 207 | '\u0f20' - '\u0030', // TIBETAN |
| 208 | '\u1040' - '\u0030', // MYANMAR |
| 209 | '\u1369' - '\u0030', // ETHIOPIC |
| 210 | '\u17e0' - '\u0030', // KHMER |
| 211 | '\u1810' - '\u0030', // MONGOLIAN |
| 212 | }; |
| 213 | |
| 214 | // some ranges adjoin or overlap, rethink if we want to do a binary search on this |
| 215 | |
| 216 | private static final char[] contexts = { |
| 217 | '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended) |
| 218 | '\u0600', '\u0700', // ARABIC |
| 219 | '\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic |
| 220 | '\u0900', '\u0980', // DEVANAGARI |
| 221 | '\u0980', '\u0a00', // BENGALI |
| 222 | '\u0a00', '\u0a80', // GURMUKHI |
| 223 | '\u0a80', '\u0b00', // GUJARATI |
| 224 | '\u0b00', '\u0b80', // ORIYA |
| 225 | '\u0b80', '\u0c00', // TAMIL - note missing zero |
| 226 | '\u0c00', '\u0c80', // TELUGU |
| 227 | '\u0c80', '\u0d00', // KANNADA |
| 228 | '\u0d00', '\u0d80', // MALAYALAM |
| 229 | '\u0e00', '\u0e80', // THAI |
| 230 | '\u0e80', '\u0f00', // LAO |
| 231 | '\u0f00', '\u1000', // TIBETAN |
| 232 | '\u1000', '\u1080', // MYANMAR |
| 233 | '\u1200', '\u1380', // ETHIOPIC |
| 234 | '\u1780', '\u1800', // KHMER |
| 235 | '\u1800', '\u1900', // MONGOLIAN |
| 236 | '\uffff', |
| 237 | }; |
| 238 | |
| 239 | // assume most characters are near each other so probing the cache is infrequent, |
| 240 | // and a linear probe is ok. |
| 241 | |
| 242 | private static int ctCache = 0; |
| 243 | private static int ctCacheLimit = contexts.length - 2; |
| 244 | |
| 245 | // warning, synchronize access to this as it modifies state |
| 246 | private static int getContextKey(char c) { |
| 247 | if (c < contexts[ctCache]) { |
| 248 | while (ctCache > 0 && c < contexts[ctCache]) --ctCache; |
| 249 | } else if (c >= contexts[ctCache + 1]) { |
| 250 | while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache; |
| 251 | } |
| 252 | |
| 253 | // if we're not in a known range, then return EUROPEAN as the range key |
| 254 | return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY; |
| 255 | } |
| 256 | |
| 257 | /* |
| 258 | * A range table of strong directional characters (types L, R, AL). |
| 259 | * Even (left) indexes are starts of ranges of non-strong-directional (or undefined) |
| 260 | * characters, odd (right) indexes are starts of ranges of strong directional |
| 261 | * characters. |
| 262 | */ |
| 263 | private static char[] strongTable = { |
| 264 | '\u0000', '\u0041', |
| 265 | '\u005b', '\u0061', |
| 266 | '\u007b', '\u00aa', |
| 267 | '\u00ab', '\u00b5', |
| 268 | '\u00b6', '\u00ba', |
| 269 | '\u00bb', '\u00c0', |
| 270 | '\u00d7', '\u00d8', |
| 271 | '\u00f7', '\u00f8', |
| 272 | '\u0220', '\u0222', |
| 273 | '\u0234', '\u0250', |
| 274 | '\u02ae', '\u02b0', |
| 275 | '\u02b9', '\u02bb', |
| 276 | '\u02c2', '\u02d0', |
| 277 | '\u02d2', '\u02e0', |
| 278 | '\u02e5', '\u02ee', |
| 279 | '\u02ef', '\u037a', |
| 280 | '\u037b', '\u0386', |
| 281 | '\u0387', '\u0388', |
| 282 | '\u038b', '\u038c', |
| 283 | '\u038d', '\u038e', |
| 284 | '\u03a2', '\u03a3', |
| 285 | '\u03cf', '\u03d0', |
| 286 | '\u03d8', '\u03da', |
| 287 | '\u03f4', '\u0400', |
| 288 | '\u0483', '\u048c', |
| 289 | '\u04c5', '\u04c7', |
| 290 | '\u04c9', '\u04cb', |
| 291 | '\u04cd', '\u04d0', |
| 292 | '\u04f6', '\u04f8', |
| 293 | '\u04fa', '\u0531', |
| 294 | '\u0557', '\u0559', |
| 295 | '\u0560', '\u0561', |
| 296 | '\u0588', '\u0589', |
| 297 | '\u058a', '\u05be', |
| 298 | '\u05bf', '\u05c0', |
| 299 | '\u05c1', '\u05c3', |
| 300 | '\u05c4', '\u05d0', |
| 301 | '\u05eb', '\u05f0', |
| 302 | '\u05f5', '\u061b', |
| 303 | '\u061c', '\u061f', |
| 304 | '\u0620', '\u0621', |
| 305 | '\u063b', '\u0640', |
| 306 | '\u064b', '\u066d', |
| 307 | '\u066e', '\u0671', |
| 308 | '\u06d6', '\u06e5', |
| 309 | '\u06e7', '\u06fa', |
| 310 | '\u06ff', '\u0700', |
| 311 | '\u070e', '\u0710', |
| 312 | '\u0711', '\u0712', |
| 313 | '\u072d', '\u0780', |
| 314 | '\u07a6', '\u0903', |
| 315 | '\u0904', '\u0905', |
| 316 | '\u093a', '\u093d', |
| 317 | '\u0941', '\u0949', |
| 318 | '\u094d', '\u0950', |
| 319 | '\u0951', '\u0958', |
| 320 | '\u0962', '\u0964', |
| 321 | '\u0971', '\u0982', |
| 322 | '\u0984', '\u0985', |
| 323 | '\u098d', '\u098f', |
| 324 | '\u0991', '\u0993', |
| 325 | '\u09a9', '\u09aa', |
| 326 | '\u09b1', '\u09b2', |
| 327 | '\u09b3', '\u09b6', |
| 328 | '\u09ba', '\u09be', |
| 329 | '\u09c1', '\u09c7', |
| 330 | '\u09c9', '\u09cb', |
| 331 | '\u09cd', '\u09d7', |
| 332 | '\u09d8', '\u09dc', |
| 333 | '\u09de', '\u09df', |
| 334 | '\u09e2', '\u09e6', |
| 335 | '\u09f2', '\u09f4', |
| 336 | '\u09fb', '\u0a05', |
| 337 | '\u0a0b', '\u0a0f', |
| 338 | '\u0a11', '\u0a13', |
| 339 | '\u0a29', '\u0a2a', |
| 340 | '\u0a31', '\u0a32', |
| 341 | '\u0a34', '\u0a35', |
| 342 | '\u0a37', '\u0a38', |
| 343 | '\u0a3a', '\u0a3e', |
| 344 | '\u0a41', '\u0a59', |
| 345 | '\u0a5d', '\u0a5e', |
| 346 | '\u0a5f', '\u0a66', |
| 347 | '\u0a70', '\u0a72', |
| 348 | '\u0a75', '\u0a83', |
| 349 | '\u0a84', '\u0a85', |
| 350 | '\u0a8c', '\u0a8d', |
| 351 | '\u0a8e', '\u0a8f', |
| 352 | '\u0a92', '\u0a93', |
| 353 | '\u0aa9', '\u0aaa', |
| 354 | '\u0ab1', '\u0ab2', |
| 355 | '\u0ab4', '\u0ab5', |
| 356 | '\u0aba', '\u0abd', |
| 357 | '\u0ac1', '\u0ac9', |
| 358 | '\u0aca', '\u0acb', |
| 359 | '\u0acd', '\u0ad0', |
| 360 | '\u0ad1', '\u0ae0', |
| 361 | '\u0ae1', '\u0ae6', |
| 362 | '\u0af0', '\u0b02', |
| 363 | '\u0b04', '\u0b05', |
| 364 | '\u0b0d', '\u0b0f', |
| 365 | '\u0b11', '\u0b13', |
| 366 | '\u0b29', '\u0b2a', |
| 367 | '\u0b31', '\u0b32', |
| 368 | '\u0b34', '\u0b36', |
| 369 | '\u0b3a', '\u0b3d', |
| 370 | '\u0b3f', '\u0b40', |
| 371 | '\u0b41', '\u0b47', |
| 372 | '\u0b49', '\u0b4b', |
| 373 | '\u0b4d', '\u0b57', |
| 374 | '\u0b58', '\u0b5c', |
| 375 | '\u0b5e', '\u0b5f', |
| 376 | '\u0b62', '\u0b66', |
| 377 | '\u0b71', '\u0b83', |
| 378 | '\u0b84', '\u0b85', |
| 379 | '\u0b8b', '\u0b8e', |
| 380 | '\u0b91', '\u0b92', |
| 381 | '\u0b96', '\u0b99', |
| 382 | '\u0b9b', '\u0b9c', |
| 383 | '\u0b9d', '\u0b9e', |
| 384 | '\u0ba0', '\u0ba3', |
| 385 | '\u0ba5', '\u0ba8', |
| 386 | '\u0bab', '\u0bae', |
| 387 | '\u0bb6', '\u0bb7', |
| 388 | '\u0bba', '\u0bbe', |
| 389 | '\u0bc0', '\u0bc1', |
| 390 | '\u0bc3', '\u0bc6', |
| 391 | '\u0bc9', '\u0bca', |
| 392 | '\u0bcd', '\u0bd7', |
| 393 | '\u0bd8', '\u0be7', |
| 394 | '\u0bf3', '\u0c01', |
| 395 | '\u0c04', '\u0c05', |
| 396 | '\u0c0d', '\u0c0e', |
| 397 | '\u0c11', '\u0c12', |
| 398 | '\u0c29', '\u0c2a', |
| 399 | '\u0c34', '\u0c35', |
| 400 | '\u0c3a', '\u0c41', |
| 401 | '\u0c45', '\u0c60', |
| 402 | '\u0c62', '\u0c66', |
| 403 | '\u0c70', '\u0c82', |
| 404 | '\u0c84', '\u0c85', |
| 405 | '\u0c8d', '\u0c8e', |
| 406 | '\u0c91', '\u0c92', |
| 407 | '\u0ca9', '\u0caa', |
| 408 | '\u0cb4', '\u0cb5', |
| 409 | '\u0cba', '\u0cbe', |
| 410 | '\u0cbf', '\u0cc0', |
| 411 | '\u0cc5', '\u0cc7', |
| 412 | '\u0cc9', '\u0cca', |
| 413 | '\u0ccc', '\u0cd5', |
| 414 | '\u0cd7', '\u0cde', |
| 415 | '\u0cdf', '\u0ce0', |
| 416 | '\u0ce2', '\u0ce6', |
| 417 | '\u0cf0', '\u0d02', |
| 418 | '\u0d04', '\u0d05', |
| 419 | '\u0d0d', '\u0d0e', |
| 420 | '\u0d11', '\u0d12', |
| 421 | '\u0d29', '\u0d2a', |
| 422 | '\u0d3a', '\u0d3e', |
| 423 | '\u0d41', '\u0d46', |
| 424 | '\u0d49', '\u0d4a', |
| 425 | '\u0d4d', '\u0d57', |
| 426 | '\u0d58', '\u0d60', |
| 427 | '\u0d62', '\u0d66', |
| 428 | '\u0d70', '\u0d82', |
| 429 | '\u0d84', '\u0d85', |
| 430 | '\u0d97', '\u0d9a', |
| 431 | '\u0db2', '\u0db3', |
| 432 | '\u0dbc', '\u0dbd', |
| 433 | '\u0dbe', '\u0dc0', |
| 434 | '\u0dc7', '\u0dcf', |
| 435 | '\u0dd2', '\u0dd8', |
| 436 | '\u0de0', '\u0df2', |
| 437 | '\u0df5', '\u0e01', |
| 438 | '\u0e31', '\u0e32', |
| 439 | '\u0e34', '\u0e40', |
| 440 | '\u0e47', '\u0e4f', |
| 441 | '\u0e5c', '\u0e81', |
| 442 | '\u0e83', '\u0e84', |
| 443 | '\u0e85', '\u0e87', |
| 444 | '\u0e89', '\u0e8a', |
| 445 | '\u0e8b', '\u0e8d', |
| 446 | '\u0e8e', '\u0e94', |
| 447 | '\u0e98', '\u0e99', |
| 448 | '\u0ea0', '\u0ea1', |
| 449 | '\u0ea4', '\u0ea5', |
| 450 | '\u0ea6', '\u0ea7', |
| 451 | '\u0ea8', '\u0eaa', |
| 452 | '\u0eac', '\u0ead', |
| 453 | '\u0eb1', '\u0eb2', |
| 454 | '\u0eb4', '\u0ebd', |
| 455 | '\u0ebe', '\u0ec0', |
| 456 | '\u0ec5', '\u0ec6', |
| 457 | '\u0ec7', '\u0ed0', |
| 458 | '\u0eda', '\u0edc', |
| 459 | '\u0ede', '\u0f00', |
| 460 | '\u0f18', '\u0f1a', |
| 461 | '\u0f35', '\u0f36', |
| 462 | '\u0f37', '\u0f38', |
| 463 | '\u0f39', '\u0f3e', |
| 464 | '\u0f48', '\u0f49', |
| 465 | '\u0f6b', '\u0f7f', |
| 466 | '\u0f80', '\u0f85', |
| 467 | '\u0f86', '\u0f88', |
| 468 | '\u0f8c', '\u0fbe', |
| 469 | '\u0fc6', '\u0fc7', |
| 470 | '\u0fcd', '\u0fcf', |
| 471 | '\u0fd0', '\u1000', |
| 472 | '\u1022', '\u1023', |
| 473 | '\u1028', '\u1029', |
| 474 | '\u102b', '\u102c', |
| 475 | '\u102d', '\u1031', |
| 476 | '\u1032', '\u1038', |
| 477 | '\u1039', '\u1040', |
| 478 | '\u1058', '\u10a0', |
| 479 | '\u10c6', '\u10d0', |
| 480 | '\u10f7', '\u10fb', |
| 481 | '\u10fc', '\u1100', |
| 482 | '\u115a', '\u115f', |
| 483 | '\u11a3', '\u11a8', |
| 484 | '\u11fa', '\u1200', |
| 485 | '\u1207', '\u1208', |
| 486 | '\u1247', '\u1248', |
| 487 | '\u1249', '\u124a', |
| 488 | '\u124e', '\u1250', |
| 489 | '\u1257', '\u1258', |
| 490 | '\u1259', '\u125a', |
| 491 | '\u125e', '\u1260', |
| 492 | '\u1287', '\u1288', |
| 493 | '\u1289', '\u128a', |
| 494 | '\u128e', '\u1290', |
| 495 | '\u12af', '\u12b0', |
| 496 | '\u12b1', '\u12b2', |
| 497 | '\u12b6', '\u12b8', |
| 498 | '\u12bf', '\u12c0', |
| 499 | '\u12c1', '\u12c2', |
| 500 | '\u12c6', '\u12c8', |
| 501 | '\u12cf', '\u12d0', |
| 502 | '\u12d7', '\u12d8', |
| 503 | '\u12ef', '\u12f0', |
| 504 | '\u130f', '\u1310', |
| 505 | '\u1311', '\u1312', |
| 506 | '\u1316', '\u1318', |
| 507 | '\u131f', '\u1320', |
| 508 | '\u1347', '\u1348', |
| 509 | '\u135b', '\u1361', |
| 510 | '\u137d', '\u13a0', |
| 511 | '\u13f5', '\u1401', |
| 512 | '\u1677', '\u1681', |
| 513 | '\u169b', '\u16a0', |
| 514 | '\u16f1', '\u1780', |
| 515 | '\u17b7', '\u17be', |
| 516 | '\u17c6', '\u17c7', |
| 517 | '\u17c9', '\u17d4', |
| 518 | '\u17db', '\u17dc', |
| 519 | '\u17dd', '\u17e0', |
| 520 | '\u17ea', '\u1810', |
| 521 | '\u181a', '\u1820', |
| 522 | '\u1878', '\u1880', |
| 523 | '\u18a9', '\u1e00', |
| 524 | '\u1e9c', '\u1ea0', |
| 525 | '\u1efa', '\u1f00', |
| 526 | '\u1f16', '\u1f18', |
| 527 | '\u1f1e', '\u1f20', |
| 528 | '\u1f46', '\u1f48', |
| 529 | '\u1f4e', '\u1f50', |
| 530 | '\u1f58', '\u1f59', |
| 531 | '\u1f5a', '\u1f5b', |
| 532 | '\u1f5c', '\u1f5d', |
| 533 | '\u1f5e', '\u1f5f', |
| 534 | '\u1f7e', '\u1f80', |
| 535 | '\u1fb5', '\u1fb6', |
| 536 | '\u1fbd', '\u1fbe', |
| 537 | '\u1fbf', '\u1fc2', |
| 538 | '\u1fc5', '\u1fc6', |
| 539 | '\u1fcd', '\u1fd0', |
| 540 | '\u1fd4', '\u1fd6', |
| 541 | '\u1fdc', '\u1fe0', |
| 542 | '\u1fed', '\u1ff2', |
| 543 | '\u1ff5', '\u1ff6', |
| 544 | '\u1ffd', '\u200e', |
| 545 | '\u2010', '\u207f', |
| 546 | '\u2080', '\u2102', |
| 547 | '\u2103', '\u2107', |
| 548 | '\u2108', '\u210a', |
| 549 | '\u2114', '\u2115', |
| 550 | '\u2116', '\u2119', |
| 551 | '\u211e', '\u2124', |
| 552 | '\u2125', '\u2126', |
| 553 | '\u2127', '\u2128', |
| 554 | '\u2129', '\u212a', |
| 555 | '\u212e', '\u212f', |
| 556 | '\u2132', '\u2133', |
| 557 | '\u213a', '\u2160', |
| 558 | '\u2184', '\u2336', |
| 559 | '\u237b', '\u2395', |
| 560 | '\u2396', '\u249c', |
| 561 | '\u24ea', '\u3005', |
| 562 | '\u3008', '\u3021', |
| 563 | '\u302a', '\u3031', |
| 564 | '\u3036', '\u3038', |
| 565 | '\u303b', '\u3041', |
| 566 | '\u3095', '\u309d', |
| 567 | '\u309f', '\u30a1', |
| 568 | '\u30fb', '\u30fc', |
| 569 | '\u30ff', '\u3105', |
| 570 | '\u312d', '\u3131', |
| 571 | '\u318f', '\u3190', |
| 572 | '\u31b8', '\u3200', |
| 573 | '\u321d', '\u3220', |
| 574 | '\u3244', '\u3260', |
| 575 | '\u327c', '\u327f', |
| 576 | '\u32b1', '\u32c0', |
| 577 | '\u32cc', '\u32d0', |
| 578 | '\u32ff', '\u3300', |
| 579 | '\u3377', '\u337b', |
| 580 | '\u33de', '\u33e0', |
| 581 | '\u33ff', '\u3400', |
| 582 | '\u4db6', '\u4e00', |
| 583 | '\u9fa6', '\ua000', |
| 584 | '\ua48d', '\uac00', |
| 585 | '\ud7a4', '\uf900', |
| 586 | '\ufa2e', '\ufb00', |
| 587 | '\ufb07', '\ufb13', |
| 588 | '\ufb18', '\ufb1d', |
| 589 | '\ufb1e', '\ufb1f', |
| 590 | '\ufb29', '\ufb2a', |
| 591 | '\ufb37', '\ufb38', |
| 592 | '\ufb3d', '\ufb3e', |
| 593 | '\ufb3f', '\ufb40', |
| 594 | '\ufb42', '\ufb43', |
| 595 | '\ufb45', '\ufb46', |
| 596 | '\ufbb2', '\ufbd3', |
| 597 | '\ufd3e', '\ufd50', |
| 598 | '\ufd90', '\ufd92', |
| 599 | '\ufdc8', '\ufdf0', |
| 600 | '\ufdfc', '\ufe70', |
| 601 | '\ufe73', '\ufe74', |
| 602 | '\ufe75', '\ufe76', |
| 603 | '\ufefd', '\uff21', |
| 604 | '\uff3b', '\uff41', |
| 605 | '\uff5b', '\uff66', |
| 606 | '\uffbf', '\uffc2', |
| 607 | '\uffc8', '\uffca', |
| 608 | '\uffd0', '\uffd2', |
| 609 | '\uffd8', '\uffda', |
| 610 | '\uffdd', '\uffff' // last entry is sentinel, actually never checked |
| 611 | }; |
| 612 | |
| 613 | |
| 614 | // use a binary search with a cache |
| 615 | |
| 616 | private static int stCache = 0; |
| 617 | |
| 618 | // warning, synchronize access to this as it modifies state |
| 619 | private static boolean isStrongDirectional(char c) { |
| 620 | if (c < strongTable[stCache]) { |
| 621 | stCache = search(c, strongTable, 0, stCache); |
| 622 | } else if (c >= strongTable[stCache + 1]) { |
| 623 | stCache = search(c, strongTable, stCache + 1, strongTable.length - stCache - 1); |
| 624 | } |
| 625 | return (stCache & 0x1) == 1; |
| 626 | } |
| 627 | |
| 628 | static private int getKeyFromMask(int mask) { |
| 629 | int key = 0; |
| 630 | while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) { |
| 631 | ++key; |
| 632 | } |
| 633 | if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) { |
| 634 | throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask)); |
| 635 | } |
| 636 | return key; |
| 637 | } |
| 638 | |
| 639 | /** |
| 640 | * Returns a shaper for the provided unicode range. All |
| 641 | * Latin-1 (EUROPEAN) digits are converted |
| 642 | * to the corresponding decimal unicode digits. |
| 643 | * @param singleRange the specified Unicode range |
| 644 | * @return a non-contextual numeric shaper |
| 645 | * @throws IllegalArgumentException if the range is not a single range |
| 646 | */ |
| 647 | static public NumericShaper getShaper(int singleRange) { |
| 648 | int key = getKeyFromMask(singleRange); |
| 649 | return new NumericShaper(key, singleRange); |
| 650 | } |
| 651 | |
| 652 | /** |
| 653 | * Returns a contextual shaper for the provided unicode range(s). |
| 654 | * Latin-1 (EUROPEAN) digits are converted to the decimal digits |
| 655 | * corresponding to the range of the preceding text, if the |
| 656 | * range is one of the provided ranges. Multiple ranges are |
| 657 | * represented by or-ing the values together, such as, |
| 658 | * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The |
| 659 | * shaper assumes EUROPEAN as the starting context, that is, if |
| 660 | * EUROPEAN digits are encountered before any strong directional |
| 661 | * text in the string, the context is presumed to be EUROPEAN, and |
| 662 | * so the digits will not shape. |
| 663 | * @param ranges the specified Unicode ranges |
| 664 | * @return a shaper for the specified ranges |
| 665 | */ |
| 666 | static public NumericShaper getContextualShaper(int ranges) { |
| 667 | ranges |= CONTEXTUAL_MASK; |
| 668 | return new NumericShaper(EUROPEAN_KEY, ranges); |
| 669 | } |
| 670 | |
| 671 | /** |
| 672 | * Returns a contextual shaper for the provided unicode range(s). |
| 673 | * Latin-1 (EUROPEAN) digits will be converted to the decimal digits |
| 674 | * corresponding to the range of the preceding text, if the |
| 675 | * range is one of the provided ranges. Multiple ranges are |
| 676 | * represented by or-ing the values together, for example, |
| 677 | * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The |
| 678 | * shaper uses defaultContext as the starting context. |
| 679 | * @param ranges the specified Unicode ranges |
| 680 | * @param defaultContext the starting context, such as |
| 681 | * <code>NumericShaper.EUROPEAN</code> |
| 682 | * @return a shaper for the specified Unicode ranges. |
| 683 | * @throws IllegalArgumentException if the specified |
| 684 | * <code>defaultContext</code> is not a single valid range. |
| 685 | */ |
| 686 | static public NumericShaper getContextualShaper(int ranges, int defaultContext) { |
| 687 | int key = getKeyFromMask(defaultContext); |
| 688 | ranges |= CONTEXTUAL_MASK; |
| 689 | return new NumericShaper(key, ranges); |
| 690 | } |
| 691 | |
| 692 | /** |
| 693 | * Private constructor. |
| 694 | */ |
| 695 | private NumericShaper(int key, int mask) { |
| 696 | this.key = key; |
| 697 | this.mask = mask; |
| 698 | } |
| 699 | |
| 700 | /** |
| 701 | * Converts the digits in the text that occur between start and |
| 702 | * start + count. |
| 703 | * @param text an array of characters to convert |
| 704 | * @param start the index into <code>text</code> to start |
| 705 | * converting |
| 706 | * @param count the number of characters in <code>text</code> |
| 707 | * to convert |
| 708 | * @throws IndexOutOfBoundsException if start or start + count is |
| 709 | * out of bounds |
| 710 | * @throws NullPointerException if text is null |
| 711 | */ |
| 712 | public void shape(char[] text, int start, int count) { |
| 713 | if (text == null) { |
| 714 | throw new NullPointerException("text is null"); |
| 715 | } |
| 716 | if ((start < 0) |
| 717 | || (start > text.length) |
| 718 | || ((start + count) < 0) |
| 719 | || ((start + count) > text.length)) { |
| 720 | throw new IndexOutOfBoundsException( |
| 721 | "bad start or count for text of length " + text.length); |
| 722 | } |
| 723 | |
| 724 | if (isContextual()) { |
| 725 | shapeContextually(text, start, count, key); |
| 726 | } else { |
| 727 | shapeNonContextually(text, start, count); |
| 728 | } |
| 729 | } |
| 730 | |
| 731 | /** |
| 732 | * Converts the digits in the text that occur between start and |
| 733 | * start + count, using the provided context. |
| 734 | * Context is ignored if the shaper is not a contextual shaper. |
| 735 | * @param text an array of characters |
| 736 | * @param start the index into <code>text</code> to start |
| 737 | * converting |
| 738 | * @param count the number of characters in <code>text</code> |
| 739 | * to convert |
| 740 | * @param context the context to which to convert the |
| 741 | * characters, such as <code>NumericShaper.EUROPEAN</code> |
| 742 | * @throws IndexOutOfBoundsException if start or start + count is |
| 743 | * out of bounds |
| 744 | * @throws NullPointerException if text is null |
| 745 | * @throws IllegalArgumentException if this is a contextual shaper |
| 746 | * and the specified <code>context</code> is not a single valid |
| 747 | * range. |
| 748 | */ |
| 749 | public void shape(char[] text, int start, int count, int context) { |
| 750 | if (text == null) { |
| 751 | throw new NullPointerException("text is null"); |
| 752 | } |
| 753 | if ((start < 0) |
| 754 | || (start > text.length) |
| 755 | || ((start + count) < 0) |
| 756 | || ((start + count) > text.length)) { |
| 757 | throw new IndexOutOfBoundsException( |
| 758 | "bad start or count for text of length " + text.length); |
| 759 | } |
| 760 | |
| 761 | if (isContextual()) { |
| 762 | int ctxKey = getKeyFromMask(context); |
| 763 | shapeContextually(text, start, count, ctxKey); |
| 764 | } else { |
| 765 | shapeNonContextually(text, start, count); |
| 766 | } |
| 767 | } |
| 768 | |
| 769 | /** |
| 770 | * Returns a <code>boolean</code> indicating whether or not |
| 771 | * this shaper shapes contextually. |
| 772 | * @return <code>true</code> if this shaper is contextual; |
| 773 | * <code>false</code> otherwise. |
| 774 | */ |
| 775 | public boolean isContextual() { |
| 776 | return (mask & CONTEXTUAL_MASK) != 0; |
| 777 | } |
| 778 | |
| 779 | /** |
| 780 | * Returns an <code>int</code> that ORs together the values for |
| 781 | * all the ranges that will be shaped. |
| 782 | * <p> |
| 783 | * For example, to check if a shaper shapes to Arabic, you would use the |
| 784 | * following: |
| 785 | * <blockquote> |
| 786 | * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code> |
| 787 | * </blockquote> |
| 788 | * @return the values for all the ranges to be shaped. |
| 789 | */ |
| 790 | public int getRanges() { |
| 791 | return mask & ~CONTEXTUAL_MASK; |
| 792 | } |
| 793 | |
| 794 | /** |
| 795 | * Perform non-contextual shaping. |
| 796 | */ |
| 797 | private void shapeNonContextually(char[] text, int start, int count) { |
| 798 | int base = bases[key]; |
| 799 | char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero |
| 800 | for (int i = start, e = start + count; i < e; ++i) { |
| 801 | char c = text[i]; |
| 802 | if (c >= minDigit && c <= '\u0039') { |
| 803 | text[i] = (char)(c + base); |
| 804 | } |
| 805 | } |
| 806 | } |
| 807 | |
| 808 | /** |
| 809 | * Perform contextual shaping. |
| 810 | * Synchronized to protect caches used in getContextKey and isStrongDirectional. |
| 811 | */ |
| 812 | private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) { |
| 813 | |
| 814 | // if we don't support this context, then don't shape |
| 815 | if ((mask & (1<<ctxKey)) == 0) { |
| 816 | ctxKey = EUROPEAN_KEY; |
| 817 | } |
| 818 | int lastkey = ctxKey; |
| 819 | |
| 820 | int base = bases[ctxKey]; |
| 821 | char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero |
| 822 | |
| 823 | for (int i = start, e = start + count; i < e; ++i) { |
| 824 | char c = text[i]; |
| 825 | if (c >= minDigit && c <= '\u0039') { |
| 826 | text[i] = (char)(c + base); |
| 827 | } |
| 828 | |
| 829 | if (isStrongDirectional(c)) { |
| 830 | int newkey = getContextKey(c); |
| 831 | if (newkey != lastkey) { |
| 832 | lastkey = newkey; |
| 833 | |
| 834 | ctxKey = newkey; |
| 835 | if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) { |
| 836 | ctxKey = EASTERN_ARABIC_KEY; |
| 837 | } else if ((mask & (1<<ctxKey)) == 0) { |
| 838 | ctxKey = EUROPEAN_KEY; |
| 839 | } |
| 840 | |
| 841 | base = bases[ctxKey]; |
| 842 | |
| 843 | minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero |
| 844 | } |
| 845 | } |
| 846 | } |
| 847 | } |
| 848 | |
| 849 | /** |
| 850 | * Returns a hash code for this shaper. |
| 851 | * @return this shaper's hash code. |
| 852 | * @see java.lang.Object#hashCode |
| 853 | */ |
| 854 | public int hashCode() { |
| 855 | return mask; |
| 856 | } |
| 857 | |
| 858 | /** |
| 859 | * Returns true if the specified object is an instance of |
| 860 | * <code>NumericShaper</code> and shapes identically to this one. |
| 861 | * @param o the specified object to compare to this |
| 862 | * <code>NumericShaper</code> |
| 863 | * @return <code>true</code> if <code>o</code> is an instance |
| 864 | * of <code>NumericShaper</code> and shapes in the same way; |
| 865 | * <code>false</code> otherwise. |
| 866 | * @see java.lang.Object#equals(java.lang.Object) |
| 867 | */ |
| 868 | public boolean equals(Object o) { |
| 869 | if (o != null) { |
| 870 | try { |
| 871 | NumericShaper rhs = (NumericShaper)o; |
| 872 | return rhs.mask == mask && rhs.key == key; |
| 873 | } |
| 874 | catch (ClassCastException e) { |
| 875 | } |
| 876 | } |
| 877 | return false; |
| 878 | } |
| 879 | |
| 880 | /** |
| 881 | * Returns a <code>String</code> that describes this shaper. This method |
| 882 | * is used for debugging purposes only. |
| 883 | * @return a <code>String</code> describing this shaper. |
| 884 | */ |
| 885 | public String toString() { |
| 886 | StringBuilder buf = new StringBuilder(super.toString()); |
| 887 | |
| 888 | buf.append("[contextual:" + isContextual()); |
| 889 | |
| 890 | if (isContextual()) { |
| 891 | buf.append(", context:" + keyNames[key]); |
| 892 | } |
| 893 | |
| 894 | buf.append(", range(s): "); |
| 895 | boolean first = true; |
| 896 | for (int i = 0; i < NUM_KEYS; ++i) { |
| 897 | if ((mask & (1 << i)) != 0) { |
| 898 | if (first) { |
| 899 | first = false; |
| 900 | } else { |
| 901 | buf.append(", "); |
| 902 | } |
| 903 | buf.append(keyNames[i]); |
| 904 | } |
| 905 | } |
| 906 | buf.append(']'); |
| 907 | |
| 908 | return buf.toString(); |
| 909 | } |
| 910 | |
| 911 | /** |
| 912 | * Returns the index of the high bit in value (assuming le, actually |
| 913 | * power of 2 >= value). value must be positive. |
| 914 | */ |
| 915 | private static int getHighBit(int value) { |
| 916 | if (value <= 0) { |
| 917 | return -32; |
| 918 | } |
| 919 | |
| 920 | int bit = 0; |
| 921 | |
| 922 | if (value >= 1 << 16) { |
| 923 | value >>= 16; |
| 924 | bit += 16; |
| 925 | } |
| 926 | |
| 927 | if (value >= 1 << 8) { |
| 928 | value >>= 8; |
| 929 | bit += 8; |
| 930 | } |
| 931 | |
| 932 | if (value >= 1 << 4) { |
| 933 | value >>= 4; |
| 934 | bit += 4; |
| 935 | } |
| 936 | |
| 937 | if (value >= 1 << 2) { |
| 938 | value >>= 2; |
| 939 | bit += 2; |
| 940 | } |
| 941 | |
| 942 | if (value >= 1 << 1) { |
| 943 | value >>= 1; |
| 944 | bit += 1; |
| 945 | } |
| 946 | |
| 947 | return bit; |
| 948 | } |
| 949 | |
| 950 | /** |
| 951 | * fast binary search over subrange of array. |
| 952 | */ |
| 953 | private static int search(char value, char[] array, int start, int length) |
| 954 | { |
| 955 | int power = 1 << getHighBit(length); |
| 956 | int extra = length - power; |
| 957 | int probe = power; |
| 958 | int index = start; |
| 959 | |
| 960 | if (value >= array[index + extra]) { |
| 961 | index += extra; |
| 962 | } |
| 963 | |
| 964 | while (probe > 1) { |
| 965 | probe >>= 1; |
| 966 | |
| 967 | if (value >= array[index + probe]) { |
| 968 | index += probe; |
| 969 | } |
| 970 | } |
| 971 | |
| 972 | return index; |
| 973 | } |
| 974 | } |