Blame - jdk/src/share/classes/sun/font/CMap.java - platform/libcore

blob: d14451824d1ce954b9c7afd47a39eb85114bbee0 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package sun.font;
				27
				28	import java.nio.ByteBuffer;
				29	import java.nio.CharBuffer;
				30	import java.nio.IntBuffer;
				31	import java.util.Locale;
				32	import java.nio.charset.*;
				33
				34	/*
				35	* A tt font has a CMAP table which is in turn made up of sub-tables which
				36	* describe the char to glyph mapping in (possibly) multiple ways.
				37	* CMAP subtables are described by 3 values.
				38	* 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
				39	* 2. Encoding (eg 0=symbol, 1=unicode)
				40	* 3. TrueType subtable format (how the char->glyph mapping for the encoding
				41	* is stored in the subtable). See the TrueType spec. Format 4 is required
				42	* by MS in fonts for windows. Its uses segmented mapping to delta values.
				43	* Most typically we see are (3,1,4) :
				44	* CMAP Platform ID=3 is what we use.
				45	* Encodings that are used in practice by JDK on Solaris are
				46	* symbol (3,0)
				47	* unicode (3,1)
				48	* GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
				49	* The format for almost all subtables is 4. However the solaris (3,5)
				50	* encodings are typically in format 2.
				51	*/
				52	abstract class CMap {
				53
				54	// static char WingDings_b2c[] = {
				55	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				56	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				57	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				58	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				59	// 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				60	// 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				61	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				62	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
				63	// 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				64	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				65	// 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
				66	// 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				67	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				68	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				69	// 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
				70	// 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
				71	// 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
				72	// 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
				73	// 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
				74	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				75	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
				76	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
				77	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
				78	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				79	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
				80	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				81	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				82	// 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
				83	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				84	// 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				85	// 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				86	// 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
				87	// };
				88
				89	// static char Symbols_b2c[] = {
				90	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				91	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				92	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				93	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				94	// 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
				95	// 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
				96	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				97	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				98	// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
				99	// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
				100	// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
				101	// 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
				102	// 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
				103	// 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
				104	// 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
				105	// 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
				106	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				107	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				108	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				109	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				110	// 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
				111	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				112	// 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
				113	// 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
				114	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
				115	// 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
				116	// 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
				117	// 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				118	// 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
				119	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				120	// 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				121	// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
				122	// };
				123
				124	static final short ShiftJISEncoding = 2;
				125	static final short GBKEncoding = 3;
				126	static final short Big5Encoding = 4;
				127	static final short WansungEncoding = 5;
				128	static final short JohabEncoding = 6;
				129	static final short MSUnicodeSurrogateEncoding = 10;
				130
				131	static final char noSuchChar = (char)0xfffd;
				132	static final int SHORTMASK = 0x0000ffff;
				133	static final int INTMASK = 0xffffffff;
				134
				135	static final char[][] converterMaps = new char[7][];
				136
				137	/*
				138	* Unicode->other encoding translation array. A pre-computed look up
				139	* which can be shared across all fonts using that encoding.
				140	* Using this saves running character coverters repeatedly.
				141	*/
				142	char[] xlat;
				143
				144	static CMap initialize(TrueTypeFont font) {
				145
				146	CMap cmap = null;
				147
				148	int offset, platformID, encodingID=-1;
				149
				150	int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
				151	three6=0, three10=0;
				152	boolean threeStar = false;
				153
				154	ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
				155	int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
				156	short numberSubTables = cmapBuffer.getShort(2);
				157
				158	/* locate the offsets of all 3,* (ie Microsoft platform) encodings */
				159	for (int i=0; i<numberSubTables; i++) {
				160	cmapBuffer.position(i * 8 + 4);
				161	platformID = cmapBuffer.getShort();
				162	if (platformID == 3) {
				163	threeStar = true;
				164	encodingID = cmapBuffer.getShort();
				165	offset = cmapBuffer.getInt();
				166	switch (encodingID) {
				167	case 0: three0 = offset; break; // MS Symbol encoding
				168	case 1: three1 = offset; break; // MS Unicode cmap
				169	case 2: three2 = offset; break; // ShiftJIS cmap.
				170	case 3: three3 = offset; break; // GBK cmap
				171	case 4: three4 = offset; break; // Big 5 cmap
				172	case 5: three5 = offset; break; // Wansung
				173	case 6: three6 = offset; break; // Johab
				174	case 10: three10 = offset; break; // MS Unicode surrogates
				175	}
				176	}
				177	}
				178
				179	/* This defines the preference order for cmap subtables */
				180	if (threeStar) {
				181	if (three10 != 0) {
				182	cmap = createCMap(cmapBuffer, three10, null);
				183	}
				184	else if (three0 != 0) {
				185	/* The special case treatment of these fonts leads to
				186	* anomalies where a user can view "wingdings" and "wingdings2"
				187	* and the latter shows all its code points in the unicode
				188	* private use area at 0xF000->0XF0FF and the former shows
				189	* a scattered subset of its glyphs that are known mappings to
				190	* unicode code points.
				191	* The primary purpose of these mappings was to facilitate
				192	* display of symbol chars etc in composite fonts, however
				193	* this is not needed as all these code points are covered
				194	* by Lucida Sans Regular.
				195	* Commenting this out reduces the role of these two files
				196	* (assuming that they continue to be used in font.properties)
				197	* to just one of contributing to the overall composite
				198	* font metrics, and also AWT can still access the fonts.
				199	* Clients which explicitly accessed these fonts as names
				200	* "Symbol" and "Wingdings" (ie as physical fonts) and
				201	* expected to see a scattering of these characters will
				202	* see them now as missing. How much of a problem is this?
				203	* Perhaps we could still support this mapping just for
				204	* "Symbol.ttf" but I suspect some users would prefer it
				205	* to be mapped in to the Latin range as that is how
				206	* the "symbol" font is used in native apps.
				207	*/
				208	// String name = font.platName.toLowerCase(Locale.ENGLISH);
				209	// if (name.endsWith("symbol.ttf")) {
				210	// cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
				211	// } else if (name.endsWith("wingding.ttf")) {
				212	// cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
				213	// } else {
				214	cmap = createCMap(cmapBuffer, three0, null);
				215	// }
				216	}
				217	else if (three1 != 0) {
				218	cmap = createCMap(cmapBuffer, three1, null);
				219	}
				220	else if (three2 != 0) {
				221	cmap = createCMap(cmapBuffer, three2,
				222	getConverterMap(ShiftJISEncoding));
				223	}
				224	else if (three3 != 0) {
				225	cmap = createCMap(cmapBuffer, three3,
				226	getConverterMap(GBKEncoding));
				227	}
				228	else if (three4 != 0) {
				229	/* GB2312 TrueType fonts on Solaris have wrong encoding ID for
				230	* cmap table, these fonts have EncodingID 4 which is Big5
				231	* encoding according the TrueType spec, but actually the
				232	* fonts are using gb2312 encoding, have to use this
				233	* workaround to make Solaris zh_CN locale work. -sherman
				234	*/
				235	if (FontManager.isSolaris && font.platName != null &&
				236	(font.platName.startsWith(
				237	"/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") \|\|
				238	font.platName.startsWith(
				239	"/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") \|\|
				240	font.platName.startsWith(
				241	"/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {
				242	cmap = createCMap(cmapBuffer, three4,
				243	getConverterMap(GBKEncoding));
				244	}
				245	else {
				246	cmap = createCMap(cmapBuffer, three4,
				247	getConverterMap(Big5Encoding));
				248	}
				249	}
				250	else if (three5 != 0) {
				251	cmap = createCMap(cmapBuffer, three5,
				252	getConverterMap(WansungEncoding));
				253	}
				254	else if (three6 != 0) {
				255	cmap = createCMap(cmapBuffer, three6,
				256	getConverterMap(JohabEncoding));
				257	}
				258	} else {
				259	/* No 3,* subtable was found. Just use whatever is the first
				260	* table listed. Not very useful but maybe better than
				261	* rejecting the font entirely?
				262	*/
				263	cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
				264	}
				265	return cmap;
				266	}
				267
				268	/* speed up the converting by setting the range for double
				269	* byte characters;
				270	*/
				271	static char[] getConverter(short encodingID) {
				272	int dBegin = 0x8000;
				273	int dEnd = 0xffff;
				274	String encoding;
				275
				276	switch (encodingID) {
				277	case ShiftJISEncoding:
				278	dBegin = 0x8140;
				279	dEnd = 0xfcfc;
				280	encoding = "SJIS";
				281	break;
				282	case GBKEncoding:
				283	dBegin = 0x8140;
				284	dEnd = 0xfea0;
				285	encoding = "GBK";
				286	break;
				287	case Big5Encoding:
				288	dBegin = 0xa140;
				289	dEnd = 0xfefe;
				290	encoding = "Big5";
				291	break;
				292	case WansungEncoding:
				293	dBegin = 0xa1a1;
				294	dEnd = 0xfede;
				295	encoding = "EUC_KR";
				296	break;
				297	case JohabEncoding:
				298	dBegin = 0x8141;
				299	dEnd = 0xfdfe;
				300	encoding = "Johab";
				301	break;
				302	default:
				303	return null;
				304	}
				305
				306	try {
				307	char[] convertedChars = new char[65536];
				308	for (int i=0; i<65536; i++) {
				309	convertedChars[i] = noSuchChar;
				310	}
				311
				312	byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
				313	char[] outputChars = new char[(dEnd-dBegin+1)];
				314
				315	int j = 0;
				316	int firstByte;
				317	if (encodingID == ShiftJISEncoding) {
				318	for (int i = dBegin; i <= dEnd; i++) {
				319	firstByte = (i >> 8 & 0xff);
				320	if (firstByte >= 0xa1 && firstByte <= 0xdf) {
				321	//sjis halfwidth katakana
				322	inputBytes[j++] = (byte)0xff;
				323	inputBytes[j++] = (byte)0xff;
				324	} else {
				325	inputBytes[j++] = (byte)firstByte;
				326	inputBytes[j++] = (byte)(i & 0xff);
				327	}
				328	}
				329	} else {
				330	for (int i = dBegin; i <= dEnd; i++) {
				331	inputBytes[j++] = (byte)(i>>8 & 0xff);
				332	inputBytes[j++] = (byte)(i & 0xff);
				333	}
				334	}
				335
				336	Charset.forName(encoding).newDecoder()
				337	.onMalformedInput(CodingErrorAction.REPLACE)
				338	.onUnmappableCharacter(CodingErrorAction.REPLACE)
				339	.replaceWith("\u0000")
				340	.decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
				341	CharBuffer.wrap(outputChars, 0, outputChars.length),
				342	true);
				343
				344	// ensure single byte ascii
				345	for (int i = 0x20; i <= 0x7e; i++) {
				346	convertedChars[i] = (char)i;
				347	}
				348
				349	//sjis halfwidth katakana
				350	if (encodingID == ShiftJISEncoding) {
				351	for (int i = 0xa1; i <= 0xdf; i++) {
				352	convertedChars[i] = (char)(i - 0xa1 + 0xff61);
				353	}
				354	}
				355
				356	/* It would save heap space (approx 60Kbytes for each of these
				357	* converters) if stored only valid ranges (ie returned
				358	* outputChars directly. But this is tricky since want to
				359	* include the ASCII range too.
				360	*/
				361	// System.err.println("oc.len="+outputChars.length);
				362	// System.err.println("cc.len="+convertedChars.length);
				363	// System.err.println("dbegin="+dBegin);
				364	System.arraycopy(outputChars, 0, convertedChars, dBegin,
				365	outputChars.length);
				366
				367	//return convertedChars;
				368	/* invert this map as now want it to map from Unicode
				369	* to other encoding.
				370	*/
				371	char [] invertedChars = new char[65536];
				372	for (int i=0;i<65536;i++) {
				373	if (convertedChars[i] != noSuchChar) {
				374	invertedChars[convertedChars[i]] = (char)i;
				375	}
				376	}
				377	return invertedChars;
				378
				379	} catch (Exception e) {
				380	e.printStackTrace();
				381	}
				382	return null;
				383	}
				384
				385	/*
				386	* The returned array maps to unicode from some other 2 byte encoding
				387	* eg for a 2byte index which represents a SJIS char, the indexed
				388	* value is the corresponding unicode char.
				389	*/
				390	static char[] getConverterMap(short encodingID) {
				391	if (converterMaps[encodingID] == null) {
				392	converterMaps[encodingID] = getConverter(encodingID);
				393	}
				394	return converterMaps[encodingID];
				395	}
				396
				397
				398	static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
				399	/* First do a sanity check that this cmap subtable is contained
				400	* within the cmap table.
				401	*/
				402	int subtableFormat = buffer.getChar(offset);
				403	long subtableLength;
				404	if (subtableFormat < 8) {
				405	subtableLength = buffer.getChar(offset+2);
				406	} else {
				407	subtableLength = buffer.getInt(offset+4) & INTMASK;
				408	}
				409	if (offset+subtableLength > buffer.capacity()) {
				410	if (FontManager.logging) {
				411	FontManager.logger.warning("Cmap subtable overflows buffer.");
				412	}
				413	}
				414	switch (subtableFormat) {
				415	case 0: return new CMapFormat0(buffer, offset);
				416	case 2: return new CMapFormat2(buffer, offset, xlat);
				417	case 4: return new CMapFormat4(buffer, offset, xlat);
				418	case 6: return new CMapFormat6(buffer, offset, xlat);
				419	case 8: return new CMapFormat8(buffer, offset, xlat);
				420	case 10: return new CMapFormat10(buffer, offset, xlat);
				421	case 12: return new CMapFormat12(buffer, offset, xlat);
				422	default: throw new RuntimeException("Cmap format unimplemented: " +
				423	(int)buffer.getChar(offset));
				424	}
				425	}
				426
				427	/*
				428	final char charVal(byte[] cmap, int index) {
				429	return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
				430	}
				431
				432	final short shortVal(byte[] cmap, int index) {
				433	return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
				434	}
				435	*/
				436	abstract char getGlyph(int charCode);
				437
				438	/* Format 4 Header is
				439	* ushort format (off=0)
				440	* ushort length (off=2)
				441	* ushort language (off=4)
				442	* ushort segCountX2 (off=6)
				443	* ushort searchRange (off=8)
				444	* ushort entrySelector (off=10)
				445	* ushort rangeShift (off=12)
				446	* ushort endCount[segCount] (off=14)
				447	* ushort reservedPad
				448	* ushort startCount[segCount]
				449	* short idDelta[segCount]
				450	* idRangeOFfset[segCount]
				451	* ushort glyphIdArray[]
				452	*/
				453	static class CMapFormat4 extends CMap {
				454	int segCount;
				455	int entrySelector;
				456	int rangeShift;
				457	char[] endCount;
				458	char[] startCount;
				459	short[] idDelta;
				460	char[] idRangeOffset;
				461	char[] glyphIds;
				462
				463	CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
				464
				465	this.xlat = xlat;
				466
				467	bbuffer.position(offset);
				468	CharBuffer buffer = bbuffer.asCharBuffer();
				469	buffer.get(); // skip, we already know format=4
				470	int subtableLength = buffer.get();
				471	/* Try to recover from some bad fonts which specify a subtable
				472	* length that would overflow the byte buffer holding the whole
				473	* cmap table. If this isn't a recoverable situation an exception
				474	* may be thrown which is caught higher up the call stack.
				475	* Whilst this may seem lenient, in practice, unless the "bad"
				476	* subtable we are using is the last one in the cmap table we
				477	* would have no way of knowing about this problem anyway.
				478	*/
				479	if (offset+subtableLength > bbuffer.capacity()) {
				480	subtableLength = bbuffer.capacity() - offset;
				481	}
				482	buffer.get(); // skip language
				483	segCount = buffer.get()/2;
				484	int searchRange = buffer.get();
				485	entrySelector = buffer.get();
				486	rangeShift = buffer.get()/2;
				487	startCount = new char[segCount];
				488	endCount = new char[segCount];
				489	idDelta = new short[segCount];
				490	idRangeOffset = new char[segCount];
				491
				492	for (int i=0; i<segCount; i++) {
				493	endCount[i] = buffer.get();
				494	}
				495	buffer.get(); // 2 bytes for reserved pad
				496	for (int i=0; i<segCount; i++) {
				497	startCount[i] = buffer.get();
				498	}
				499
				500	for (int i=0; i<segCount; i++) {
				501	idDelta[i] = (short)buffer.get();
				502	}
				503
				504	for (int i=0; i<segCount; i++) {
				505	char ctmp = buffer.get();
				506	idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
				507	}
				508	/* Can calculate the number of glyph IDs by subtracting
				509	* "pos" from the length of the cmap
				510	*/
				511	int pos = (segCount*8+16)/2;
				512	buffer.position(pos);
				513	int numGlyphIds = (subtableLength/2 - pos);
				514	glyphIds = new char[numGlyphIds];
				515	for (int i=0;i<numGlyphIds;i++) {
				516	glyphIds[i] = buffer.get();
				517	}
				518	/*
				519	System.err.println("segcount="+segCount);
				520	System.err.println("entrySelector="+entrySelector);
				521	System.err.println("rangeShift="+rangeShift);
				522	for (int j=0;j<segCount;j++) {
				523	System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
				524	" ec="+(int)(endCount[j]&0xffff)+
				525	" delta="+idDelta[j] +
				526	" ro="+(int)idRangeOffset[j]);
				527	}
				528
				529	//System.err.println("numglyphs="+glyphIds.length);
				530	for (int i=0;i<numGlyphIds;i++) {
				531	System.err.println("gid["+i+"]="+(int)glyphIds[i]);
				532	}
				533	*/
				534	}
				535
				536	char getGlyph(int charCode) {
				537
				538	int index = 0;
				539	char glyphCode = 0;
				540
				541	int controlGlyph = getControlCodeGlyph(charCode, true);
				542	if (controlGlyph >= 0) {
				543	return (char)controlGlyph;
				544	}
				545
				546	/* presence of translation array indicates that this
				547	* cmap is in some other (non-unicode encoding).
				548	* In order to look-up a char->glyph mapping we need to
				549	* translate the unicode code point to the encoding of
				550	* the cmap.
				551	* REMIND: VALID CHARCODES??
				552	*/
				553	if (xlat != null) {
				554	charCode = xlat[charCode];
				555	}
				556
				557	/*
				558	* Citation from the TrueType (and OpenType) spec:
				559	* The segments are sorted in order of increasing endCode
				560	* values, and the segment values are specified in four parallel
				561	* arrays. You search for the first endCode that is greater than
				562	* or equal to the character code you want to map. If the
				563	* corresponding startCode is less than or equal to the
				564	* character code, then you use the corresponding idDelta and
				565	* idRangeOffset to map the character code to a glyph index
				566	* (otherwise, the missingGlyph is returned).
				567	*/
				568
				569	/*
				570	* CMAP format4 defines several fields for optimized search of
				571	* the segment list (entrySelector, searchRange, rangeShift).
				572	* However, benefits are neglible and some fonts have incorrect
				573	* data - so we use straightforward binary search (see bug 6247425)
				574	*/
				575	int left = 0, right = startCount.length;
				576	index = startCount.length >> 1;
				577	while (left < right) {
				578	if (endCount[index] < charCode) {
				579	left = index + 1;
				580	} else {
				581	right = index;
				582	}
				583	index = (left + right) >> 1;
				584	}
				585
				586	if (charCode >= startCount[index] && charCode <= endCount[index]) {
				587	int rangeOffset = idRangeOffset[index];
				588
				589	if (rangeOffset == 0) {
				590	glyphCode = (char)(charCode + idDelta[index]);
				591	} else {
				592	/* Calculate an index into the glyphIds array */
				593
				594	/*
				595	System.err.println("rangeoffset="+rangeOffset+
				596	" charCode=" + charCode +
				597	" scnt["+index+"]="+(int)startCount[index] +
				598	" segCnt="+segCount);
				599	*/
				600
				601	int glyphIDIndex = rangeOffset - segCount + index
				602	+ (charCode - startCount[index]);
				603	glyphCode = glyphIds[glyphIDIndex];
				604	if (glyphCode != 0) {
				605	glyphCode = (char)(glyphCode + idDelta[index]);
				606	}
				607	}
				608	}
				609	if (glyphCode != 0) {
				610	//System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);
				611	}
				612	return glyphCode;
				613	}
				614	}
				615
				616	// Format 0: Byte Encoding table
				617	static class CMapFormat0 extends CMap {
				618	byte [] cmap;
				619
				620	CMapFormat0(ByteBuffer buffer, int offset) {
				621
				622	/* skip 6 bytes of format, length, and version */
				623	int len = buffer.getChar(offset+2);
				624	cmap = new byte[len-6];
				625	buffer.position(offset+6);
				626	buffer.get(cmap);
				627	}
				628
				629	char getGlyph(int charCode) {
				630	if (charCode < 256) {
				631	if (charCode < 0x0010) {
				632	switch (charCode) {
				633	case 0x0009:
				634	case 0x000a:
				635	case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
				636	}
				637	}
				638	return (char)(0xff & cmap[charCode]);
				639	} else {
				640	return 0;
				641	}
				642	}
				643	}
				644
				645	// static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
				646
				647	// CMap cmap = createCMap(buffer, offset, null);
				648	// if (cmap == null) {
				649	// return null;
				650	// } else {
				651	// return new CMapFormatSymbol(cmap, syms);
				652	// }
				653	// }
				654
				655	// static class CMapFormatSymbol extends CMap {
				656
				657	// CMap cmap;
				658	// static final int NUM_BUCKETS = 128;
				659	// Bucket[] buckets = new Bucket[NUM_BUCKETS];
				660
				661	// class Bucket {
				662	// char unicode;
				663	// char glyph;
				664	// Bucket next;
				665
				666	// Bucket(char u, char g) {
				667	// unicode = u;
				668	// glyph = g;
				669	// }
				670	// }
				671
				672	// CMapFormatSymbol(CMap cmap, char[] syms) {
				673
				674	// this.cmap = cmap;
				675
				676	// for (int i=0;i<syms.length;i++) {
				677	// char unicode = syms[i];
				678	// if (unicode != noSuchChar) {
				679	// char glyph = cmap.getGlyph(i + 0xf000);
				680	// int hash = unicode % NUM_BUCKETS;
				681	// Bucket bucket = new Bucket(unicode, glyph);
				682	// if (buckets[hash] == null) {
				683	// buckets[hash] = bucket;
				684	// } else {
				685	// Bucket b = buckets[hash];
				686	// while (b.next != null) {
				687	// b = b.next;
				688	// }
				689	// b.next = bucket;
				690	// }
				691	// }
				692	// }
				693	// }
				694
				695	// char getGlyph(int unicode) {
				696	// if (unicode >= 0x1000) {
				697	// return 0;
				698	// }
				699	// else if (unicode >=0xf000 && unicode < 0xf100) {
				700	// return cmap.getGlyph(unicode);
				701	// } else {
				702	// Bucket b = buckets[unicode % NUM_BUCKETS];
				703	// while (b != null) {
				704	// if (b.unicode == unicode) {
				705	// return b.glyph;
				706	// } else {
				707	// b = b.next;
				708	// }
				709	// }
				710	// return 0;
				711	// }
				712	// }
				713	// }
				714
				715	// Format 2: High-byte mapping through table
				716	static class CMapFormat2 extends CMap {
				717
				718	char[] subHeaderKey = new char[256];
				719	/* Store subheaders in individual arrays
				720	* A SubHeader entry theortically looks like {
				721	* char firstCode;
				722	* char entryCount;
				723	* short idDelta;
				724	* char idRangeOffset;
				725	* }
				726	*/
				727	char[] firstCodeArray;
				728	char[] entryCountArray;
				729	short[] idDeltaArray;
				730	char[] idRangeOffSetArray;
				731
				732	char[] glyphIndexArray;
				733
				734	CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
				735
				736	this.xlat = xlat;
				737
				738	int tableLen = buffer.getChar(offset+2);
				739	buffer.position(offset+6);
				740	CharBuffer cBuffer = buffer.asCharBuffer();
				741	char maxSubHeader = 0;
				742	for (int i=0;i<256;i++) {
				743	subHeaderKey[i] = cBuffer.get();
				744	if (subHeaderKey[i] > maxSubHeader) {
				745	maxSubHeader = subHeaderKey[i];
				746	}
				747	}
				748	/* The value of the subHeaderKey is 8 * the subHeader index,
				749	* so the number of subHeaders can be obtained by dividing
				750	* this value bv 8 and adding 1.
				751	*/
				752	int numSubHeaders = (maxSubHeader >> 3) +1;
				753	firstCodeArray = new char[numSubHeaders];
				754	entryCountArray = new char[numSubHeaders];
				755	idDeltaArray = new short[numSubHeaders];
				756	idRangeOffSetArray = new char[numSubHeaders];
				757	for (int i=0; i<numSubHeaders; i++) {
				758	firstCodeArray[i] = cBuffer.get();
				759	entryCountArray[i] = cBuffer.get();
				760	idDeltaArray[i] = (short)cBuffer.get();
				761	idRangeOffSetArray[i] = cBuffer.get();
				762	// System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
				763	// " ec="+(int)entryCountArray[i]+
				764	// " delta="+(int)idDeltaArray[i]+
				765	// " offset="+(int)idRangeOffSetArray[i]);
				766	}
				767
				768	int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
				769	glyphIndexArray = new char[glyphIndexArrSize];
				770	for (int i=0; i<glyphIndexArrSize;i++) {
				771	glyphIndexArray[i] = cBuffer.get();
				772	}
				773	}
				774
				775	char getGlyph(int charCode) {
				776	int controlGlyph = getControlCodeGlyph(charCode, true);
				777	if (controlGlyph >= 0) {
				778	return (char)controlGlyph;
				779	}
				780
				781	if (xlat != null) {
				782	charCode = xlat[charCode];
				783	}
				784
				785	char highByte = (char)(charCode >> 8);
				786	char lowByte = (char)(charCode & 0xff);
				787	int key = subHeaderKey[highByte]>>3; // index into subHeaders
				788	char mapMe;
				789
				790	if (key != 0) {
				791	mapMe = lowByte;
				792	} else {
				793	mapMe = highByte;
				794	if (mapMe == 0) {
				795	mapMe = lowByte;
				796	}
				797	}
				798
				799	// System.err.println("charCode="+Integer.toHexString(charCode)+
				800	// " key="+key+ " mapMe="+Integer.toHexString(mapMe));
				801	char firstCode = firstCodeArray[key];
				802	if (mapMe < firstCode) {
				803	return 0;
				804	} else {
				805	mapMe -= firstCode;
				806	}
				807
				808	if (mapMe < entryCountArray[key]) {
				809	/* "address" arithmetic is needed to calculate the offset
				810	* into glyphIndexArray. "idRangeOffSetArray[key]" specifies
				811	* the number of bytes from that location in the table where
				812	* the subarray of glyphIndexes starting at "firstCode" begins.
				813	* Each entry in the subHeader table is 8 bytes, and the
				814	* idRangeOffSetArray field is at offset 6 in the entry.
				815	* The glyphIndexArray immediately follows the subHeaders.
				816	* So if there are "N" entries then the number of bytes to the
				817	* start of glyphIndexArray is (N-key)*8-6.
				818	* Subtract this from the idRangeOffSetArray value to get
				819	* the number of bytes into glyphIndexArray and divide by 2 to
				820	* get the (char) array index.
				821	*/
				822	int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
				823	int glyphSubArrayStart =
				824	(idRangeOffSetArray[key] - glyphArrayOffset)/2;
				825	char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
				826	if (glyphCode != 0) {
				827	glyphCode += idDeltaArray[key]; //idDelta
				828	return glyphCode;
				829	}
				830	}
				831	return 0;
				832	}
				833	}
				834
				835	// Format 6: Trimmed table mapping
				836	static class CMapFormat6 extends CMap {
				837
				838	char firstCode;
				839	char entryCount;
				840	char[] glyphIdArray;
				841
				842	CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
				843
				844	System.err.println("WARNING: CMapFormat8 is untested.");
				845	bbuffer.position(offset+6);
				846	CharBuffer buffer = bbuffer.asCharBuffer();
				847	firstCode = buffer.get();
				848	entryCount = buffer.get();
				849	glyphIdArray = new char[entryCount];
				850	for (int i=0; i< entryCount; i++) {
				851	glyphIdArray[i] = buffer.get();
				852	}
				853	}
				854
				855	char getGlyph(int charCode) {
				856	int controlGlyph = getControlCodeGlyph(charCode, true);
				857	if (controlGlyph >= 0) {
				858	return (char)controlGlyph;
				859	}
				860
				861	if (xlat != null) {
				862	charCode = xlat[charCode];
				863	}
				864
				865	charCode -= firstCode;
				866	if (charCode < 0 \|\| charCode >= entryCount) {
				867	return 0;
				868	} else {
				869	return glyphIdArray[charCode];
				870	}
				871	}
				872	}
				873
				874	// Format 8: mixed 16-bit and 32-bit coverage
				875	// Seems unlikely this code will ever get tested as we look for
				876	// MS platform Cmaps and MS states (in the Opentype spec on their website)
				877	// that MS doesn't support this format
				878	static class CMapFormat8 extends CMap {
				879	byte[] is32 = new byte[8192];
				880	int nGroups;
				881	int[] startCharCode;
				882	int[] endCharCode;
				883	int[] startGlyphID;
				884
				885	CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
				886
				887	System.err.println("WARNING: CMapFormat8 is untested.");
				888	bbuffer.position(12);
				889	bbuffer.get(is32);
				890	nGroups = bbuffer.getInt();
				891	startCharCode = new int[nGroups];
				892	endCharCode = new int[nGroups];
				893	startGlyphID = new int[nGroups];
				894	}
				895
				896	char getGlyph(int charCode) {
				897	if (xlat != null) {
				898	throw new RuntimeException("xlat array for cmap fmt=8");
				899	}
				900	return 0;
				901	}
				902
				903	}
				904
				905
				906	// Format 4-byte 10: Trimmed table mapping
				907	// Seems unlikely this code will ever get tested as we look for
				908	// MS platform Cmaps and MS states (in the Opentype spec on their website)
				909	// that MS doesn't support this format
				910	static class CMapFormat10 extends CMap {
				911
				912	long firstCode;
				913	int entryCount;
				914	char[] glyphIdArray;
				915
				916	CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
				917
				918	System.err.println("WARNING: CMapFormat10 is untested.");
				919	firstCode = bbuffer.getInt() & INTMASK;
				920	entryCount = bbuffer.getInt() & INTMASK;
				921	bbuffer.position(offset+20);
				922	CharBuffer buffer = bbuffer.asCharBuffer();
				923	glyphIdArray = new char[entryCount];
				924	for (int i=0; i< entryCount; i++) {
				925	glyphIdArray[i] = buffer.get();
				926	}
				927	}
				928
				929	char getGlyph(int charCode) {
				930
				931	if (xlat != null) {
				932	throw new RuntimeException("xlat array for cmap fmt=10");
				933	}
				934
				935	int code = (int)(charCode - firstCode);
				936	if (code < 0 \|\| code >= entryCount) {
				937	return 0;
				938	} else {
				939	return glyphIdArray[code];
				940	}
				941	}
				942	}
				943
				944	// Format 12: Segmented coverage for UCS-4 (fonts supporting
				945	// surrogate pairs)
				946	static class CMapFormat12 extends CMap {
				947
				948	int numGroups;
				949	int highBit =0;
				950	int power;
				951	int extra;
				952	long[] startCharCode;
				953	long[] endCharCode;
				954	int[] startGlyphID;
				955
				956	CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
				957	if (xlat != null) {
				958	throw new RuntimeException("xlat array for cmap fmt=12");
				959	}
				960
				961	numGroups = buffer.getInt(offset+12);
				962	startCharCode = new long[numGroups];
				963	endCharCode = new long[numGroups];
				964	startGlyphID = new int[numGroups];
				965	buffer.position(offset+16);
				966	buffer = buffer.slice();
				967	IntBuffer ibuffer = buffer.asIntBuffer();
				968	for (int i=0; i<numGroups; i++) {
				969	startCharCode[i] = ibuffer.get() & INTMASK;
				970	endCharCode[i] = ibuffer.get() & INTMASK;
				971	startGlyphID[i] = ibuffer.get() & INTMASK;
				972	}
				973
				974	/* Finds the high bit by binary searching through the bits */
				975	int value = numGroups;
				976
				977	if (value >= 1 << 16) {
				978	value >>= 16;
				979	highBit += 16;
				980	}
				981
				982	if (value >= 1 << 8) {
				983	value >>= 8;
				984	highBit += 8;
				985	}
				986
				987	if (value >= 1 << 4) {
				988	value >>= 4;
				989	highBit += 4;
				990	}
				991
				992	if (value >= 1 << 2) {
				993	value >>= 2;
				994	highBit += 2;
				995	}
				996
				997	if (value >= 1 << 1) {
				998	value >>= 1;
				999	highBit += 1;
				1000	}
				1001
				1002	power = 1 << highBit;
				1003	extra = numGroups - power;
				1004	}
				1005
				1006	char getGlyph(int charCode) {
				1007	int controlGlyph = getControlCodeGlyph(charCode, false);
				1008	if (controlGlyph >= 0) {
				1009	return (char)controlGlyph;
				1010	}
				1011	int probe = power;
				1012	int range = 0;
				1013
				1014	if (startCharCode[extra] <= charCode) {
				1015	range = extra;
				1016	}
				1017
				1018	while (probe > 1) {
				1019	probe >>= 1;
				1020
				1021	if (startCharCode[range+probe] <= charCode) {
				1022	range += probe;
				1023	}
				1024	}
				1025
				1026	if (startCharCode[range] <= charCode &&
				1027	endCharCode[range] >= charCode) {
				1028	return (char)
				1029	(startGlyphID[range] + (charCode - startCharCode[range]));
				1030	}
				1031
				1032	return 0;
				1033	}
				1034
				1035	}
				1036
				1037	/* Used to substitute for bad Cmaps. */
				1038	static class NullCMapClass extends CMap {
				1039
				1040	char getGlyph(int charCode) {
				1041	return 0;
				1042	}
				1043	}
				1044
				1045	public static final NullCMapClass theNullCmap = new NullCMapClass();
				1046
				1047	final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
				1048	if (charCode < 0x0010) {
				1049	switch (charCode) {
				1050	case 0x0009:
				1051	case 0x000a:
				1052	case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
				1053	}
				1054	} else if (charCode >= 0x200c) {
				1055	if ((charCode <= 0x200f) \|\|
				1056	(charCode >= 0x2028 && charCode <= 0x202e) \|\|
				1057	(charCode >= 0x206a && charCode <= 0x206f)) {
				1058	return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
				1059	} else if (noSurrogates && charCode >= 0xFFFF) {
				1060	return 0;
				1061	}
				1062	}
				1063	return -1;
				1064	}
				1065	}