Blame - jdk/src/share/classes/java/awt/font/NumericShaper.java - platform/libcore

blob: a1ffbf0b7c3a435f2f8b7066f4da9c83c92ef801 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package java.awt.font;
				27
				28	/**
				29	* The <code>NumericShaper</code> class is used to convert Latin-1 (European)
				30	* digits to other Unicode decimal digits. Users of this class will
				31	* primarily be people who wish to present data using
				32	* national digit shapes, but find it more convenient to represent the
				33	* data internally using Latin-1 (European) digits. This does not
				34	* interpret the deprecated numeric shape selector character (U+206E).
				35	* <p>
				36	* Instances of <code>NumericShaper</code> are typically applied
				37	* as attributes to text with the
				38	* {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
				39	* of the <code>TextAttribute</code> class.
				40	* For example, this code snippet causes a <code>TextLayout</code> to
				41	* shape European digits to Arabic in an Arabic context:<br>
				42	* <blockquote><pre>
				43	* Map map = new HashMap();
				44	* map.put(TextAttribute.NUMERIC_SHAPING,
				45	* NumericShaper.getContextualShaper(NumericShaper.ARABIC));
				46	* FontRenderContext frc = ...;
				47	* TextLayout layout = new TextLayout(text, map, frc);
				48	* layout.draw(g2d, x, y);
				49	* </pre></blockquote>
				50	* <br>
				51	* It is also possible to perform numeric shaping explicitly using instances
				52	* of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
				53	* <blockquote><pre>
				54	* char[] text = ...;
				55	* // shape all EUROPEAN digits (except zero) to ARABIC digits
				56	* NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
				57	* shaper.shape(text, start, count);
				58	*
				59	* // shape European digits to ARABIC digits if preceding text is Arabic, or
				60	* // shape European digits to TAMIL digits if preceding text is Tamil, or
				61	* // leave European digits alone if there is no preceding text, or
				62	* // preceding text is neither Arabic nor Tamil
				63	* NumericShaper shaper =
				64	* NumericShaper.getContextualShaper(NumericShaper.ARABIC \|
				65	* NumericShaper.TAMIL,
				66	* NumericShaper.EUROPEAN);
				67	* shaper.shape(text. start, count);
				68	* </pre></blockquote>
				69	*
				70	* @since 1.4
				71	*/
				72
				73	public final class NumericShaper implements java.io.Serializable {
				74	/** index of context for contextual shaping - values range from 0 to 18 */
				75	private int key;
				76
				77	/** flag indicating whether to shape contextually (high bit) and which
				78	* digit ranges to shape (bits 0-18)
				79	*/
				80	private int mask;
				81
				82	/** Identifies the Latin-1 (European) and extended range, and
				83	* Latin-1 (European) decimal base.
				84	*/
				85	public static final int EUROPEAN = 1<<0;
				86
				87	/** Identifies the ARABIC range and decimal base. */
				88	public static final int ARABIC = 1<<1;
				89
				90	/** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
				91	public static final int EASTERN_ARABIC = 1<<2;
				92
				93	/** Identifies the DEVANAGARI range and decimal base. */
				94	public static final int DEVANAGARI = 1<<3;
				95
				96	/** Identifies the BENGALI range and decimal base. */
				97	public static final int BENGALI = 1<<4;
				98
				99	/** Identifies the GURMUKHI range and decimal base. */
				100	public static final int GURMUKHI = 1<<5;
				101
				102	/** Identifies the GUJARATI range and decimal base. */
				103	public static final int GUJARATI = 1<<6;
				104
				105	/** Identifies the ORIYA range and decimal base. */
				106	public static final int ORIYA = 1<<7;
				107
				108	/** Identifies the TAMIL range and decimal base. Tamil does not have a
				109	* decimal digit 0 so Latin-1 (European) 0 is used.
				110	*/
				111	public static final int TAMIL = 1<<8;
				112
				113	/** Identifies the TELUGU range and decimal base. */
				114	public static final int TELUGU = 1<<9;
				115
				116	/** Identifies the KANNADA range and decimal base. */
				117	public static final int KANNADA = 1<<10;
				118
				119	/** Identifies the MALAYALAM range and decimal base. */
				120	public static final int MALAYALAM = 1<<11;
				121
				122	/** Identifies the THAI range and decimal base. */
				123	public static final int THAI = 1<<12;
				124
				125	/** Identifies the LAO range and decimal base. */
				126	public static final int LAO = 1<<13;
				127
				128	/** Identifies the TIBETAN range and decimal base. */
				129	public static final int TIBETAN = 1<<14;
				130
				131	/** Identifies the MYANMAR range and decimal base. */
				132	public static final int MYANMAR = 1<<15;
				133
				134	/** Identifies the ETHIOPIC range and decimal base. */
				135	public static final int ETHIOPIC = 1<<16;
				136
				137	/** Identifies the KHMER range and decimal base. */
				138	public static final int KHMER = 1<<17;
				139
				140	/** Identifies the MONGOLIAN range and decimal base. */
				141	public static final int MONGOLIAN = 1<<18;
				142
				143	/** Identifies all ranges, for full contextual shaping. */
				144	public static final int ALL_RANGES = 0x0007ffff;
				145
				146	private static final int EUROPEAN_KEY = 0;
				147	private static final int ARABIC_KEY = 1;
				148	private static final int EASTERN_ARABIC_KEY = 2;
				149	private static final int DEVANAGARI_KEY = 3;
				150	private static final int BENGALI_KEY = 4;
				151	private static final int GURMUKHI_KEY = 5;
				152	private static final int GUJARATI_KEY = 6;
				153	private static final int ORIYA_KEY = 7;
				154	private static final int TAMIL_KEY = 8;
				155	private static final int TELUGU_KEY = 9;
				156	private static final int KANNADA_KEY = 10;
				157	private static final int MALAYALAM_KEY = 11;
				158	private static final int THAI_KEY = 12;
				159	private static final int LAO_KEY = 13;
				160	private static final int TIBETAN_KEY = 14;
				161	private static final int MYANMAR_KEY = 15;
				162	private static final int ETHIOPIC_KEY = 16;
				163	private static final int KHMER_KEY = 17;
				164	private static final int MONGOLIAN_KEY = 18;
				165
				166	private static final int NUM_KEYS = 19;
				167
				168	private static final String[] keyNames = {
				169	"EUROPEAN",
				170	"ARABIC",
				171	"EASTERN_ARABIC",
				172	"DEVANAGARI",
				173	"BENGALI",
				174	"GURMUKHI",
				175	"GUJARATI",
				176	"ORIYA",
				177	"TAMIL",
				178	"TELUGU",
				179	"KANNADA",
				180	"MALAYALAM",
				181	"THAI",
				182	"LAO",
				183	"TIBETAN",
				184	"MYANMAR",
				185	"ETHIOPIC",
				186	"KHMER",
				187	"MONGOLIAN"
				188	};
				189
				190	private static final int CONTEXTUAL_MASK = 1<<31;
				191
				192	private static final char[] bases = {
				193	'\u0030' - '\u0030', // EUROPEAN
				194	'\u0660' - '\u0030', // ARABIC
				195	'\u06f0' - '\u0030', // EASTERN_ARABIC
				196	'\u0966' - '\u0030', // DEVANAGARI
				197	'\u09e6' - '\u0030', // BENGALI
				198	'\u0a66' - '\u0030', // GURMUKHI
				199	'\u0ae6' - '\u0030', // GUJARATI
				200	'\u0b66' - '\u0030', // ORIYA
				201	'\u0be7' - '\u0030', // TAMIL - note missing zero
				202	'\u0c66' - '\u0030', // TELUGU
				203	'\u0ce6' - '\u0030', // KANNADA
				204	'\u0d66' - '\u0030', // MALAYALAM
				205	'\u0e50' - '\u0030', // THAI
				206	'\u0ed0' - '\u0030', // LAO
				207	'\u0f20' - '\u0030', // TIBETAN
				208	'\u1040' - '\u0030', // MYANMAR
				209	'\u1369' - '\u0030', // ETHIOPIC
				210	'\u17e0' - '\u0030', // KHMER
				211	'\u1810' - '\u0030', // MONGOLIAN
				212	};
				213
				214	// some ranges adjoin or overlap, rethink if we want to do a binary search on this
				215
				216	private static final char[] contexts = {
				217	'\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
				218	'\u0600', '\u0700', // ARABIC
				219	'\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic
				220	'\u0900', '\u0980', // DEVANAGARI
				221	'\u0980', '\u0a00', // BENGALI
				222	'\u0a00', '\u0a80', // GURMUKHI
				223	'\u0a80', '\u0b00', // GUJARATI
				224	'\u0b00', '\u0b80', // ORIYA
				225	'\u0b80', '\u0c00', // TAMIL - note missing zero
				226	'\u0c00', '\u0c80', // TELUGU
				227	'\u0c80', '\u0d00', // KANNADA
				228	'\u0d00', '\u0d80', // MALAYALAM
				229	'\u0e00', '\u0e80', // THAI
				230	'\u0e80', '\u0f00', // LAO
				231	'\u0f00', '\u1000', // TIBETAN
				232	'\u1000', '\u1080', // MYANMAR
				233	'\u1200', '\u1380', // ETHIOPIC
				234	'\u1780', '\u1800', // KHMER
				235	'\u1800', '\u1900', // MONGOLIAN
				236	'\uffff',
				237	};
				238
				239	// assume most characters are near each other so probing the cache is infrequent,
				240	// and a linear probe is ok.
				241
				242	private static int ctCache = 0;
				243	private static int ctCacheLimit = contexts.length - 2;
				244
				245	// warning, synchronize access to this as it modifies state
				246	private static int getContextKey(char c) {
				247	if (c < contexts[ctCache]) {
				248	while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
				249	} else if (c >= contexts[ctCache + 1]) {
				250	while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
				251	}
				252
				253	// if we're not in a known range, then return EUROPEAN as the range key
				254	return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
				255	}
				256
				257	/*
				258	* A range table of strong directional characters (types L, R, AL).
				259	* Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
				260	* characters, odd (right) indexes are starts of ranges of strong directional
				261	* characters.
				262	*/
				263	private static char[] strongTable = {
				264	'\u0000', '\u0041',
				265	'\u005b', '\u0061',
				266	'\u007b', '\u00aa',
				267	'\u00ab', '\u00b5',
				268	'\u00b6', '\u00ba',
				269	'\u00bb', '\u00c0',
				270	'\u00d7', '\u00d8',
				271	'\u00f7', '\u00f8',
				272	'\u0220', '\u0222',
				273	'\u0234', '\u0250',
				274	'\u02ae', '\u02b0',
				275	'\u02b9', '\u02bb',
				276	'\u02c2', '\u02d0',
				277	'\u02d2', '\u02e0',
				278	'\u02e5', '\u02ee',
				279	'\u02ef', '\u037a',
				280	'\u037b', '\u0386',
				281	'\u0387', '\u0388',
				282	'\u038b', '\u038c',
				283	'\u038d', '\u038e',
				284	'\u03a2', '\u03a3',
				285	'\u03cf', '\u03d0',
				286	'\u03d8', '\u03da',
				287	'\u03f4', '\u0400',
				288	'\u0483', '\u048c',
				289	'\u04c5', '\u04c7',
				290	'\u04c9', '\u04cb',
				291	'\u04cd', '\u04d0',
				292	'\u04f6', '\u04f8',
				293	'\u04fa', '\u0531',
				294	'\u0557', '\u0559',
				295	'\u0560', '\u0561',
				296	'\u0588', '\u0589',
				297	'\u058a', '\u05be',
				298	'\u05bf', '\u05c0',
				299	'\u05c1', '\u05c3',
				300	'\u05c4', '\u05d0',
				301	'\u05eb', '\u05f0',
				302	'\u05f5', '\u061b',
				303	'\u061c', '\u061f',
				304	'\u0620', '\u0621',
				305	'\u063b', '\u0640',
				306	'\u064b', '\u066d',
				307	'\u066e', '\u0671',
				308	'\u06d6', '\u06e5',
				309	'\u06e7', '\u06fa',
				310	'\u06ff', '\u0700',
				311	'\u070e', '\u0710',
				312	'\u0711', '\u0712',
				313	'\u072d', '\u0780',
				314	'\u07a6', '\u0903',
				315	'\u0904', '\u0905',
				316	'\u093a', '\u093d',
				317	'\u0941', '\u0949',
				318	'\u094d', '\u0950',
				319	'\u0951', '\u0958',
				320	'\u0962', '\u0964',
				321	'\u0971', '\u0982',
				322	'\u0984', '\u0985',
				323	'\u098d', '\u098f',
				324	'\u0991', '\u0993',
				325	'\u09a9', '\u09aa',
				326	'\u09b1', '\u09b2',
				327	'\u09b3', '\u09b6',
				328	'\u09ba', '\u09be',
				329	'\u09c1', '\u09c7',
				330	'\u09c9', '\u09cb',
				331	'\u09cd', '\u09d7',
				332	'\u09d8', '\u09dc',
				333	'\u09de', '\u09df',
				334	'\u09e2', '\u09e6',
				335	'\u09f2', '\u09f4',
				336	'\u09fb', '\u0a05',
				337	'\u0a0b', '\u0a0f',
				338	'\u0a11', '\u0a13',
				339	'\u0a29', '\u0a2a',
				340	'\u0a31', '\u0a32',
				341	'\u0a34', '\u0a35',
				342	'\u0a37', '\u0a38',
				343	'\u0a3a', '\u0a3e',
				344	'\u0a41', '\u0a59',
				345	'\u0a5d', '\u0a5e',
				346	'\u0a5f', '\u0a66',
				347	'\u0a70', '\u0a72',
				348	'\u0a75', '\u0a83',
				349	'\u0a84', '\u0a85',
				350	'\u0a8c', '\u0a8d',
				351	'\u0a8e', '\u0a8f',
				352	'\u0a92', '\u0a93',
				353	'\u0aa9', '\u0aaa',
				354	'\u0ab1', '\u0ab2',
				355	'\u0ab4', '\u0ab5',
				356	'\u0aba', '\u0abd',
				357	'\u0ac1', '\u0ac9',
				358	'\u0aca', '\u0acb',
				359	'\u0acd', '\u0ad0',
				360	'\u0ad1', '\u0ae0',
				361	'\u0ae1', '\u0ae6',
				362	'\u0af0', '\u0b02',
				363	'\u0b04', '\u0b05',
				364	'\u0b0d', '\u0b0f',
				365	'\u0b11', '\u0b13',
				366	'\u0b29', '\u0b2a',
				367	'\u0b31', '\u0b32',
				368	'\u0b34', '\u0b36',
				369	'\u0b3a', '\u0b3d',
				370	'\u0b3f', '\u0b40',
				371	'\u0b41', '\u0b47',
				372	'\u0b49', '\u0b4b',
				373	'\u0b4d', '\u0b57',
				374	'\u0b58', '\u0b5c',
				375	'\u0b5e', '\u0b5f',
				376	'\u0b62', '\u0b66',
				377	'\u0b71', '\u0b83',
				378	'\u0b84', '\u0b85',
				379	'\u0b8b', '\u0b8e',
				380	'\u0b91', '\u0b92',
				381	'\u0b96', '\u0b99',
				382	'\u0b9b', '\u0b9c',
				383	'\u0b9d', '\u0b9e',
				384	'\u0ba0', '\u0ba3',
				385	'\u0ba5', '\u0ba8',
				386	'\u0bab', '\u0bae',
				387	'\u0bb6', '\u0bb7',
				388	'\u0bba', '\u0bbe',
				389	'\u0bc0', '\u0bc1',
				390	'\u0bc3', '\u0bc6',
				391	'\u0bc9', '\u0bca',
				392	'\u0bcd', '\u0bd7',
				393	'\u0bd8', '\u0be7',
				394	'\u0bf3', '\u0c01',
				395	'\u0c04', '\u0c05',
				396	'\u0c0d', '\u0c0e',
				397	'\u0c11', '\u0c12',
				398	'\u0c29', '\u0c2a',
				399	'\u0c34', '\u0c35',
				400	'\u0c3a', '\u0c41',
				401	'\u0c45', '\u0c60',
				402	'\u0c62', '\u0c66',
				403	'\u0c70', '\u0c82',
				404	'\u0c84', '\u0c85',
				405	'\u0c8d', '\u0c8e',
				406	'\u0c91', '\u0c92',
				407	'\u0ca9', '\u0caa',
				408	'\u0cb4', '\u0cb5',
				409	'\u0cba', '\u0cbe',
				410	'\u0cbf', '\u0cc0',
				411	'\u0cc5', '\u0cc7',
				412	'\u0cc9', '\u0cca',
				413	'\u0ccc', '\u0cd5',
				414	'\u0cd7', '\u0cde',
				415	'\u0cdf', '\u0ce0',
				416	'\u0ce2', '\u0ce6',
				417	'\u0cf0', '\u0d02',
				418	'\u0d04', '\u0d05',
				419	'\u0d0d', '\u0d0e',
				420	'\u0d11', '\u0d12',
				421	'\u0d29', '\u0d2a',
				422	'\u0d3a', '\u0d3e',
				423	'\u0d41', '\u0d46',
				424	'\u0d49', '\u0d4a',
				425	'\u0d4d', '\u0d57',
				426	'\u0d58', '\u0d60',
				427	'\u0d62', '\u0d66',
				428	'\u0d70', '\u0d82',
				429	'\u0d84', '\u0d85',
				430	'\u0d97', '\u0d9a',
				431	'\u0db2', '\u0db3',
				432	'\u0dbc', '\u0dbd',
				433	'\u0dbe', '\u0dc0',
				434	'\u0dc7', '\u0dcf',
				435	'\u0dd2', '\u0dd8',
				436	'\u0de0', '\u0df2',
				437	'\u0df5', '\u0e01',
				438	'\u0e31', '\u0e32',
				439	'\u0e34', '\u0e40',
				440	'\u0e47', '\u0e4f',
				441	'\u0e5c', '\u0e81',
				442	'\u0e83', '\u0e84',
				443	'\u0e85', '\u0e87',
				444	'\u0e89', '\u0e8a',
				445	'\u0e8b', '\u0e8d',
				446	'\u0e8e', '\u0e94',
				447	'\u0e98', '\u0e99',
				448	'\u0ea0', '\u0ea1',
				449	'\u0ea4', '\u0ea5',
				450	'\u0ea6', '\u0ea7',
				451	'\u0ea8', '\u0eaa',
				452	'\u0eac', '\u0ead',
				453	'\u0eb1', '\u0eb2',
				454	'\u0eb4', '\u0ebd',
				455	'\u0ebe', '\u0ec0',
				456	'\u0ec5', '\u0ec6',
				457	'\u0ec7', '\u0ed0',
				458	'\u0eda', '\u0edc',
				459	'\u0ede', '\u0f00',
				460	'\u0f18', '\u0f1a',
				461	'\u0f35', '\u0f36',
				462	'\u0f37', '\u0f38',
				463	'\u0f39', '\u0f3e',
				464	'\u0f48', '\u0f49',
				465	'\u0f6b', '\u0f7f',
				466	'\u0f80', '\u0f85',
				467	'\u0f86', '\u0f88',
				468	'\u0f8c', '\u0fbe',
				469	'\u0fc6', '\u0fc7',
				470	'\u0fcd', '\u0fcf',
				471	'\u0fd0', '\u1000',
				472	'\u1022', '\u1023',
				473	'\u1028', '\u1029',
				474	'\u102b', '\u102c',
				475	'\u102d', '\u1031',
				476	'\u1032', '\u1038',
				477	'\u1039', '\u1040',
				478	'\u1058', '\u10a0',
				479	'\u10c6', '\u10d0',
				480	'\u10f7', '\u10fb',
				481	'\u10fc', '\u1100',
				482	'\u115a', '\u115f',
				483	'\u11a3', '\u11a8',
				484	'\u11fa', '\u1200',
				485	'\u1207', '\u1208',
				486	'\u1247', '\u1248',
				487	'\u1249', '\u124a',
				488	'\u124e', '\u1250',
				489	'\u1257', '\u1258',
				490	'\u1259', '\u125a',
				491	'\u125e', '\u1260',
				492	'\u1287', '\u1288',
				493	'\u1289', '\u128a',
				494	'\u128e', '\u1290',
				495	'\u12af', '\u12b0',
				496	'\u12b1', '\u12b2',
				497	'\u12b6', '\u12b8',
				498	'\u12bf', '\u12c0',
				499	'\u12c1', '\u12c2',
				500	'\u12c6', '\u12c8',
				501	'\u12cf', '\u12d0',
				502	'\u12d7', '\u12d8',
				503	'\u12ef', '\u12f0',
				504	'\u130f', '\u1310',
				505	'\u1311', '\u1312',
				506	'\u1316', '\u1318',
				507	'\u131f', '\u1320',
				508	'\u1347', '\u1348',
				509	'\u135b', '\u1361',
				510	'\u137d', '\u13a0',
				511	'\u13f5', '\u1401',
				512	'\u1677', '\u1681',
				513	'\u169b', '\u16a0',
				514	'\u16f1', '\u1780',
				515	'\u17b7', '\u17be',
				516	'\u17c6', '\u17c7',
				517	'\u17c9', '\u17d4',
				518	'\u17db', '\u17dc',
				519	'\u17dd', '\u17e0',
				520	'\u17ea', '\u1810',
				521	'\u181a', '\u1820',
				522	'\u1878', '\u1880',
				523	'\u18a9', '\u1e00',
				524	'\u1e9c', '\u1ea0',
				525	'\u1efa', '\u1f00',
				526	'\u1f16', '\u1f18',
				527	'\u1f1e', '\u1f20',
				528	'\u1f46', '\u1f48',
				529	'\u1f4e', '\u1f50',
				530	'\u1f58', '\u1f59',
				531	'\u1f5a', '\u1f5b',
				532	'\u1f5c', '\u1f5d',
				533	'\u1f5e', '\u1f5f',
				534	'\u1f7e', '\u1f80',
				535	'\u1fb5', '\u1fb6',
				536	'\u1fbd', '\u1fbe',
				537	'\u1fbf', '\u1fc2',
				538	'\u1fc5', '\u1fc6',
				539	'\u1fcd', '\u1fd0',
				540	'\u1fd4', '\u1fd6',
				541	'\u1fdc', '\u1fe0',
				542	'\u1fed', '\u1ff2',
				543	'\u1ff5', '\u1ff6',
				544	'\u1ffd', '\u200e',
				545	'\u2010', '\u207f',
				546	'\u2080', '\u2102',
				547	'\u2103', '\u2107',
				548	'\u2108', '\u210a',
				549	'\u2114', '\u2115',
				550	'\u2116', '\u2119',
				551	'\u211e', '\u2124',
				552	'\u2125', '\u2126',
				553	'\u2127', '\u2128',
				554	'\u2129', '\u212a',
				555	'\u212e', '\u212f',
				556	'\u2132', '\u2133',
				557	'\u213a', '\u2160',
				558	'\u2184', '\u2336',
				559	'\u237b', '\u2395',
				560	'\u2396', '\u249c',
				561	'\u24ea', '\u3005',
				562	'\u3008', '\u3021',
				563	'\u302a', '\u3031',
				564	'\u3036', '\u3038',
				565	'\u303b', '\u3041',
				566	'\u3095', '\u309d',
				567	'\u309f', '\u30a1',
				568	'\u30fb', '\u30fc',
				569	'\u30ff', '\u3105',
				570	'\u312d', '\u3131',
				571	'\u318f', '\u3190',
				572	'\u31b8', '\u3200',
				573	'\u321d', '\u3220',
				574	'\u3244', '\u3260',
				575	'\u327c', '\u327f',
				576	'\u32b1', '\u32c0',
				577	'\u32cc', '\u32d0',
				578	'\u32ff', '\u3300',
				579	'\u3377', '\u337b',
				580	'\u33de', '\u33e0',
				581	'\u33ff', '\u3400',
				582	'\u4db6', '\u4e00',
				583	'\u9fa6', '\ua000',
				584	'\ua48d', '\uac00',
				585	'\ud7a4', '\uf900',
				586	'\ufa2e', '\ufb00',
				587	'\ufb07', '\ufb13',
				588	'\ufb18', '\ufb1d',
				589	'\ufb1e', '\ufb1f',
				590	'\ufb29', '\ufb2a',
				591	'\ufb37', '\ufb38',
				592	'\ufb3d', '\ufb3e',
				593	'\ufb3f', '\ufb40',
				594	'\ufb42', '\ufb43',
				595	'\ufb45', '\ufb46',
				596	'\ufbb2', '\ufbd3',
				597	'\ufd3e', '\ufd50',
				598	'\ufd90', '\ufd92',
				599	'\ufdc8', '\ufdf0',
				600	'\ufdfc', '\ufe70',
				601	'\ufe73', '\ufe74',
				602	'\ufe75', '\ufe76',
				603	'\ufefd', '\uff21',
				604	'\uff3b', '\uff41',
				605	'\uff5b', '\uff66',
				606	'\uffbf', '\uffc2',
				607	'\uffc8', '\uffca',
				608	'\uffd0', '\uffd2',
				609	'\uffd8', '\uffda',
				610	'\uffdd', '\uffff' // last entry is sentinel, actually never checked
				611	};
				612
				613
				614	// use a binary search with a cache
				615
				616	private static int stCache = 0;
				617
				618	// warning, synchronize access to this as it modifies state
				619	private static boolean isStrongDirectional(char c) {
				620	if (c < strongTable[stCache]) {
				621	stCache = search(c, strongTable, 0, stCache);
				622	} else if (c >= strongTable[stCache + 1]) {
				623	stCache = search(c, strongTable, stCache + 1, strongTable.length - stCache - 1);
				624	}
				625	return (stCache & 0x1) == 1;
				626	}
				627
				628	static private int getKeyFromMask(int mask) {
				629	int key = 0;
				630	while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
				631	++key;
				632	}
				633	if (key == NUM_KEYS \|\| ((mask & ~(1<<key)) != 0)) {
				634	throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
				635	}
				636	return key;
				637	}
				638
				639	/**
				640	* Returns a shaper for the provided unicode range. All
				641	* Latin-1 (EUROPEAN) digits are converted
				642	* to the corresponding decimal unicode digits.
				643	* @param singleRange the specified Unicode range
				644	* @return a non-contextual numeric shaper
				645	* @throws IllegalArgumentException if the range is not a single range
				646	*/
				647	static public NumericShaper getShaper(int singleRange) {
				648	int key = getKeyFromMask(singleRange);
				649	return new NumericShaper(key, singleRange);
				650	}
				651
				652	/**
				653	* Returns a contextual shaper for the provided unicode range(s).
				654	* Latin-1 (EUROPEAN) digits are converted to the decimal digits
				655	* corresponding to the range of the preceding text, if the
				656	* range is one of the provided ranges. Multiple ranges are
				657	* represented by or-ing the values together, such as,
				658	* <code>NumericShaper.ARABIC \| NumericShaper.THAI</code>. The
				659	* shaper assumes EUROPEAN as the starting context, that is, if
				660	* EUROPEAN digits are encountered before any strong directional
				661	* text in the string, the context is presumed to be EUROPEAN, and
				662	* so the digits will not shape.
				663	* @param ranges the specified Unicode ranges
				664	* @return a shaper for the specified ranges
				665	*/
				666	static public NumericShaper getContextualShaper(int ranges) {
				667	ranges \|= CONTEXTUAL_MASK;
				668	return new NumericShaper(EUROPEAN_KEY, ranges);
				669	}
				670
				671	/**
				672	* Returns a contextual shaper for the provided unicode range(s).
				673	* Latin-1 (EUROPEAN) digits will be converted to the decimal digits
				674	* corresponding to the range of the preceding text, if the
				675	* range is one of the provided ranges. Multiple ranges are
				676	* represented by or-ing the values together, for example,
				677	* <code>NumericShaper.ARABIC \| NumericShaper.THAI</code>. The
				678	* shaper uses defaultContext as the starting context.
				679	* @param ranges the specified Unicode ranges
				680	* @param defaultContext the starting context, such as
				681	* <code>NumericShaper.EUROPEAN</code>
				682	* @return a shaper for the specified Unicode ranges.
				683	* @throws IllegalArgumentException if the specified
				684	* <code>defaultContext</code> is not a single valid range.
				685	*/
				686	static public NumericShaper getContextualShaper(int ranges, int defaultContext) {
				687	int key = getKeyFromMask(defaultContext);
				688	ranges \|= CONTEXTUAL_MASK;
				689	return new NumericShaper(key, ranges);
				690	}
				691
				692	/**
				693	* Private constructor.
				694	*/
				695	private NumericShaper(int key, int mask) {
				696	this.key = key;
				697	this.mask = mask;
				698	}
				699
				700	/**
				701	* Converts the digits in the text that occur between start and
				702	* start + count.
				703	* @param text an array of characters to convert
				704	* @param start the index into <code>text</code> to start
				705	* converting
				706	* @param count the number of characters in <code>text</code>
				707	* to convert
				708	* @throws IndexOutOfBoundsException if start or start + count is
				709	* out of bounds
				710	* @throws NullPointerException if text is null
				711	*/
				712	public void shape(char[] text, int start, int count) {
				713	if (text == null) {
				714	throw new NullPointerException("text is null");
				715	}
				716	if ((start < 0)
				717	\|\| (start > text.length)
				718	\|\| ((start + count) < 0)
				719	\|\| ((start + count) > text.length)) {
				720	throw new IndexOutOfBoundsException(
				721	"bad start or count for text of length " + text.length);
				722	}
				723
				724	if (isContextual()) {
				725	shapeContextually(text, start, count, key);
				726	} else {
				727	shapeNonContextually(text, start, count);
				728	}
				729	}
				730
				731	/**
				732	* Converts the digits in the text that occur between start and
				733	* start + count, using the provided context.
				734	* Context is ignored if the shaper is not a contextual shaper.
				735	* @param text an array of characters
				736	* @param start the index into <code>text</code> to start
				737	* converting
				738	* @param count the number of characters in <code>text</code>
				739	* to convert
				740	* @param context the context to which to convert the
				741	* characters, such as <code>NumericShaper.EUROPEAN</code>
				742	* @throws IndexOutOfBoundsException if start or start + count is
				743	* out of bounds
				744	* @throws NullPointerException if text is null
				745	* @throws IllegalArgumentException if this is a contextual shaper
				746	* and the specified <code>context</code> is not a single valid
				747	* range.
				748	*/
				749	public void shape(char[] text, int start, int count, int context) {
				750	if (text == null) {
				751	throw new NullPointerException("text is null");
				752	}
				753	if ((start < 0)
				754	\|\| (start > text.length)
				755	\|\| ((start + count) < 0)
				756	\|\| ((start + count) > text.length)) {
				757	throw new IndexOutOfBoundsException(
				758	"bad start or count for text of length " + text.length);
				759	}
				760
				761	if (isContextual()) {
				762	int ctxKey = getKeyFromMask(context);
				763	shapeContextually(text, start, count, ctxKey);
				764	} else {
				765	shapeNonContextually(text, start, count);
				766	}
				767	}
				768
				769	/**
				770	* Returns a <code>boolean</code> indicating whether or not
				771	* this shaper shapes contextually.
				772	* @return <code>true</code> if this shaper is contextual;
				773	* <code>false</code> otherwise.
				774	*/
				775	public boolean isContextual() {
				776	return (mask & CONTEXTUAL_MASK) != 0;
				777	}
				778
				779	/**
				780	* Returns an <code>int</code> that ORs together the values for
				781	* all the ranges that will be shaped.
				782	* <p>
				783	* For example, to check if a shaper shapes to Arabic, you would use the
				784	* following:
				785	* <blockquote>
				786	* <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
				787	* </blockquote>
				788	* @return the values for all the ranges to be shaped.
				789	*/
				790	public int getRanges() {
				791	return mask & ~CONTEXTUAL_MASK;
				792	}
				793
				794	/**
				795	* Perform non-contextual shaping.
				796	*/
				797	private void shapeNonContextually(char[] text, int start, int count) {
				798	int base = bases[key];
				799	char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
				800	for (int i = start, e = start + count; i < e; ++i) {
				801	char c = text[i];
				802	if (c >= minDigit && c <= '\u0039') {
				803	text[i] = (char)(c + base);
				804	}
				805	}
				806	}
				807
				808	/**
				809	* Perform contextual shaping.
				810	* Synchronized to protect caches used in getContextKey and isStrongDirectional.
				811	*/
				812	private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
				813
				814	// if we don't support this context, then don't shape
				815	if ((mask & (1<<ctxKey)) == 0) {
				816	ctxKey = EUROPEAN_KEY;
				817	}
				818	int lastkey = ctxKey;
				819
				820	int base = bases[ctxKey];
				821	char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
				822
				823	for (int i = start, e = start + count; i < e; ++i) {
				824	char c = text[i];
				825	if (c >= minDigit && c <= '\u0039') {
				826	text[i] = (char)(c + base);
				827	}
				828
				829	if (isStrongDirectional(c)) {
				830	int newkey = getContextKey(c);
				831	if (newkey != lastkey) {
				832	lastkey = newkey;
				833
				834	ctxKey = newkey;
				835	if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY \|\| ctxKey == EASTERN_ARABIC_KEY)) {
				836	ctxKey = EASTERN_ARABIC_KEY;
				837	} else if ((mask & (1<<ctxKey)) == 0) {
				838	ctxKey = EUROPEAN_KEY;
				839	}
				840
				841	base = bases[ctxKey];
				842
				843	minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
				844	}
				845	}
				846	}
				847	}
				848
				849	/**
				850	* Returns a hash code for this shaper.
				851	* @return this shaper's hash code.
				852	* @see java.lang.Object#hashCode
				853	*/
				854	public int hashCode() {
				855	return mask;
				856	}
				857
				858	/**
				859	* Returns true if the specified object is an instance of
				860	* <code>NumericShaper</code> and shapes identically to this one.
				861	* @param o the specified object to compare to this
				862	* <code>NumericShaper</code>
				863	* @return <code>true</code> if <code>o</code> is an instance
				864	* of <code>NumericShaper</code> and shapes in the same way;
				865	* <code>false</code> otherwise.
				866	* @see java.lang.Object#equals(java.lang.Object)
				867	*/
				868	public boolean equals(Object o) {
				869	if (o != null) {
				870	try {
				871	NumericShaper rhs = (NumericShaper)o;
				872	return rhs.mask == mask && rhs.key == key;
				873	}
				874	catch (ClassCastException e) {
				875	}
				876	}
				877	return false;
				878	}
				879
				880	/**
				881	* Returns a <code>String</code> that describes this shaper. This method
				882	* is used for debugging purposes only.
				883	* @return a <code>String</code> describing this shaper.
				884	*/
				885	public String toString() {
				886	StringBuilder buf = new StringBuilder(super.toString());
				887
				888	buf.append("[contextual:" + isContextual());
				889
				890	if (isContextual()) {
				891	buf.append(", context:" + keyNames[key]);
				892	}
				893
				894	buf.append(", range(s): ");
				895	boolean first = true;
				896	for (int i = 0; i < NUM_KEYS; ++i) {
				897	if ((mask & (1 << i)) != 0) {
				898	if (first) {
				899	first = false;
				900	} else {
				901	buf.append(", ");
				902	}
				903	buf.append(keyNames[i]);
				904	}
				905	}
				906	buf.append(']');
				907
				908	return buf.toString();
				909	}
				910
				911	/**
				912	* Returns the index of the high bit in value (assuming le, actually
				913	* power of 2 >= value). value must be positive.
				914	*/
				915	private static int getHighBit(int value) {
				916	if (value <= 0) {
				917	return -32;
				918	}
				919
				920	int bit = 0;
				921
				922	if (value >= 1 << 16) {
				923	value >>= 16;
				924	bit += 16;
				925	}
				926
				927	if (value >= 1 << 8) {
				928	value >>= 8;
				929	bit += 8;
				930	}
				931
				932	if (value >= 1 << 4) {
				933	value >>= 4;
				934	bit += 4;
				935	}
				936
				937	if (value >= 1 << 2) {
				938	value >>= 2;
				939	bit += 2;
				940	}
				941
				942	if (value >= 1 << 1) {
				943	value >>= 1;
				944	bit += 1;
				945	}
				946
				947	return bit;
				948	}
				949
				950	/**
				951	* fast binary search over subrange of array.
				952	*/
				953	private static int search(char value, char[] array, int start, int length)
				954	{
				955	int power = 1 << getHighBit(length);
				956	int extra = length - power;
				957	int probe = power;
				958	int index = start;
				959
				960	if (value >= array[index + extra]) {
				961	index += extra;
				962	}
				963
				964	while (probe > 1) {
				965	probe >>= 1;
				966
				967	if (value >= array[index + probe]) {
				968	index += probe;
				969	}
				970	}
				971
				972	return index;
				973	}
				974	}