blob: d14451824d1ce954b9c7afd47a39eb85114bbee0 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.font;
27
28import java.nio.ByteBuffer;
29import java.nio.CharBuffer;
30import java.nio.IntBuffer;
31import java.util.Locale;
32import java.nio.charset.*;
33
34/*
35 * A tt font has a CMAP table which is in turn made up of sub-tables which
36 * describe the char to glyph mapping in (possibly) multiple ways.
37 * CMAP subtables are described by 3 values.
38 * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
39 * 2. Encoding (eg 0=symbol, 1=unicode)
40 * 3. TrueType subtable format (how the char->glyph mapping for the encoding
41 * is stored in the subtable). See the TrueType spec. Format 4 is required
42 * by MS in fonts for windows. Its uses segmented mapping to delta values.
43 * Most typically we see are (3,1,4) :
44 * CMAP Platform ID=3 is what we use.
45 * Encodings that are used in practice by JDK on Solaris are
46 * symbol (3,0)
47 * unicode (3,1)
48 * GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
49 * The format for almost all subtables is 4. However the solaris (3,5)
50 * encodings are typically in format 2.
51 */
52abstract class CMap {
53
54// static char WingDings_b2c[] = {
55// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
56// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
57// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
58// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
59// 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
60// 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
61// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
62// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
63// 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
64// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
65// 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
66// 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
67// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
68// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
69// 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
70// 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
71// 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
72// 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
73// 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
74// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
75// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
76// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
77// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
78// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
79// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
80// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
81// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
82// 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
83// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
84// 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
85// 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
86// 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
87// };
88
89// static char Symbols_b2c[] = {
90// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
91// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
92// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
93// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
94// 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
95// 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
96// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
97// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
98// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
99// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
100// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
101// 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
102// 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
103// 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
104// 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
105// 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
106// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
107// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
108// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
109// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
110// 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
111// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
112// 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
113// 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
114// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
115// 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
116// 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
117// 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
118// 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
119// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
120// 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
121// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
122// };
123
124 static final short ShiftJISEncoding = 2;
125 static final short GBKEncoding = 3;
126 static final short Big5Encoding = 4;
127 static final short WansungEncoding = 5;
128 static final short JohabEncoding = 6;
129 static final short MSUnicodeSurrogateEncoding = 10;
130
131 static final char noSuchChar = (char)0xfffd;
132 static final int SHORTMASK = 0x0000ffff;
133 static final int INTMASK = 0xffffffff;
134
135 static final char[][] converterMaps = new char[7][];
136
137 /*
138 * Unicode->other encoding translation array. A pre-computed look up
139 * which can be shared across all fonts using that encoding.
140 * Using this saves running character coverters repeatedly.
141 */
142 char[] xlat;
143
144 static CMap initialize(TrueTypeFont font) {
145
146 CMap cmap = null;
147
148 int offset, platformID, encodingID=-1;
149
150 int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
151 three6=0, three10=0;
152 boolean threeStar = false;
153
154 ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
155 int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
156 short numberSubTables = cmapBuffer.getShort(2);
157
158 /* locate the offsets of all 3,* (ie Microsoft platform) encodings */
159 for (int i=0; i<numberSubTables; i++) {
160 cmapBuffer.position(i * 8 + 4);
161 platformID = cmapBuffer.getShort();
162 if (platformID == 3) {
163 threeStar = true;
164 encodingID = cmapBuffer.getShort();
165 offset = cmapBuffer.getInt();
166 switch (encodingID) {
167 case 0: three0 = offset; break; // MS Symbol encoding
168 case 1: three1 = offset; break; // MS Unicode cmap
169 case 2: three2 = offset; break; // ShiftJIS cmap.
170 case 3: three3 = offset; break; // GBK cmap
171 case 4: three4 = offset; break; // Big 5 cmap
172 case 5: three5 = offset; break; // Wansung
173 case 6: three6 = offset; break; // Johab
174 case 10: three10 = offset; break; // MS Unicode surrogates
175 }
176 }
177 }
178
179 /* This defines the preference order for cmap subtables */
180 if (threeStar) {
181 if (three10 != 0) {
182 cmap = createCMap(cmapBuffer, three10, null);
183 }
184 else if (three0 != 0) {
185 /* The special case treatment of these fonts leads to
186 * anomalies where a user can view "wingdings" and "wingdings2"
187 * and the latter shows all its code points in the unicode
188 * private use area at 0xF000->0XF0FF and the former shows
189 * a scattered subset of its glyphs that are known mappings to
190 * unicode code points.
191 * The primary purpose of these mappings was to facilitate
192 * display of symbol chars etc in composite fonts, however
193 * this is not needed as all these code points are covered
194 * by Lucida Sans Regular.
195 * Commenting this out reduces the role of these two files
196 * (assuming that they continue to be used in font.properties)
197 * to just one of contributing to the overall composite
198 * font metrics, and also AWT can still access the fonts.
199 * Clients which explicitly accessed these fonts as names
200 * "Symbol" and "Wingdings" (ie as physical fonts) and
201 * expected to see a scattering of these characters will
202 * see them now as missing. How much of a problem is this?
203 * Perhaps we could still support this mapping just for
204 * "Symbol.ttf" but I suspect some users would prefer it
205 * to be mapped in to the Latin range as that is how
206 * the "symbol" font is used in native apps.
207 */
208// String name = font.platName.toLowerCase(Locale.ENGLISH);
209// if (name.endsWith("symbol.ttf")) {
210// cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
211// } else if (name.endsWith("wingding.ttf")) {
212// cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
213// } else {
214 cmap = createCMap(cmapBuffer, three0, null);
215// }
216 }
217 else if (three1 != 0) {
218 cmap = createCMap(cmapBuffer, three1, null);
219 }
220 else if (three2 != 0) {
221 cmap = createCMap(cmapBuffer, three2,
222 getConverterMap(ShiftJISEncoding));
223 }
224 else if (three3 != 0) {
225 cmap = createCMap(cmapBuffer, three3,
226 getConverterMap(GBKEncoding));
227 }
228 else if (three4 != 0) {
229 /* GB2312 TrueType fonts on Solaris have wrong encoding ID for
230 * cmap table, these fonts have EncodingID 4 which is Big5
231 * encoding according the TrueType spec, but actually the
232 * fonts are using gb2312 encoding, have to use this
233 * workaround to make Solaris zh_CN locale work. -sherman
234 */
235 if (FontManager.isSolaris && font.platName != null &&
236 (font.platName.startsWith(
237 "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") ||
238 font.platName.startsWith(
239 "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") ||
240 font.platName.startsWith(
241 "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {
242 cmap = createCMap(cmapBuffer, three4,
243 getConverterMap(GBKEncoding));
244 }
245 else {
246 cmap = createCMap(cmapBuffer, three4,
247 getConverterMap(Big5Encoding));
248 }
249 }
250 else if (three5 != 0) {
251 cmap = createCMap(cmapBuffer, three5,
252 getConverterMap(WansungEncoding));
253 }
254 else if (three6 != 0) {
255 cmap = createCMap(cmapBuffer, three6,
256 getConverterMap(JohabEncoding));
257 }
258 } else {
259 /* No 3,* subtable was found. Just use whatever is the first
260 * table listed. Not very useful but maybe better than
261 * rejecting the font entirely?
262 */
263 cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
264 }
265 return cmap;
266 }
267
268 /* speed up the converting by setting the range for double
269 * byte characters;
270 */
271 static char[] getConverter(short encodingID) {
272 int dBegin = 0x8000;
273 int dEnd = 0xffff;
274 String encoding;
275
276 switch (encodingID) {
277 case ShiftJISEncoding:
278 dBegin = 0x8140;
279 dEnd = 0xfcfc;
280 encoding = "SJIS";
281 break;
282 case GBKEncoding:
283 dBegin = 0x8140;
284 dEnd = 0xfea0;
285 encoding = "GBK";
286 break;
287 case Big5Encoding:
288 dBegin = 0xa140;
289 dEnd = 0xfefe;
290 encoding = "Big5";
291 break;
292 case WansungEncoding:
293 dBegin = 0xa1a1;
294 dEnd = 0xfede;
295 encoding = "EUC_KR";
296 break;
297 case JohabEncoding:
298 dBegin = 0x8141;
299 dEnd = 0xfdfe;
300 encoding = "Johab";
301 break;
302 default:
303 return null;
304 }
305
306 try {
307 char[] convertedChars = new char[65536];
308 for (int i=0; i<65536; i++) {
309 convertedChars[i] = noSuchChar;
310 }
311
312 byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
313 char[] outputChars = new char[(dEnd-dBegin+1)];
314
315 int j = 0;
316 int firstByte;
317 if (encodingID == ShiftJISEncoding) {
318 for (int i = dBegin; i <= dEnd; i++) {
319 firstByte = (i >> 8 & 0xff);
320 if (firstByte >= 0xa1 && firstByte <= 0xdf) {
321 //sjis halfwidth katakana
322 inputBytes[j++] = (byte)0xff;
323 inputBytes[j++] = (byte)0xff;
324 } else {
325 inputBytes[j++] = (byte)firstByte;
326 inputBytes[j++] = (byte)(i & 0xff);
327 }
328 }
329 } else {
330 for (int i = dBegin; i <= dEnd; i++) {
331 inputBytes[j++] = (byte)(i>>8 & 0xff);
332 inputBytes[j++] = (byte)(i & 0xff);
333 }
334 }
335
336 Charset.forName(encoding).newDecoder()
337 .onMalformedInput(CodingErrorAction.REPLACE)
338 .onUnmappableCharacter(CodingErrorAction.REPLACE)
339 .replaceWith("\u0000")
340 .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
341 CharBuffer.wrap(outputChars, 0, outputChars.length),
342 true);
343
344 // ensure single byte ascii
345 for (int i = 0x20; i <= 0x7e; i++) {
346 convertedChars[i] = (char)i;
347 }
348
349 //sjis halfwidth katakana
350 if (encodingID == ShiftJISEncoding) {
351 for (int i = 0xa1; i <= 0xdf; i++) {
352 convertedChars[i] = (char)(i - 0xa1 + 0xff61);
353 }
354 }
355
356 /* It would save heap space (approx 60Kbytes for each of these
357 * converters) if stored only valid ranges (ie returned
358 * outputChars directly. But this is tricky since want to
359 * include the ASCII range too.
360 */
361// System.err.println("oc.len="+outputChars.length);
362// System.err.println("cc.len="+convertedChars.length);
363// System.err.println("dbegin="+dBegin);
364 System.arraycopy(outputChars, 0, convertedChars, dBegin,
365 outputChars.length);
366
367 //return convertedChars;
368 /* invert this map as now want it to map from Unicode
369 * to other encoding.
370 */
371 char [] invertedChars = new char[65536];
372 for (int i=0;i<65536;i++) {
373 if (convertedChars[i] != noSuchChar) {
374 invertedChars[convertedChars[i]] = (char)i;
375 }
376 }
377 return invertedChars;
378
379 } catch (Exception e) {
380 e.printStackTrace();
381 }
382 return null;
383 }
384
385 /*
386 * The returned array maps to unicode from some other 2 byte encoding
387 * eg for a 2byte index which represents a SJIS char, the indexed
388 * value is the corresponding unicode char.
389 */
390 static char[] getConverterMap(short encodingID) {
391 if (converterMaps[encodingID] == null) {
392 converterMaps[encodingID] = getConverter(encodingID);
393 }
394 return converterMaps[encodingID];
395 }
396
397
398 static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
399 /* First do a sanity check that this cmap subtable is contained
400 * within the cmap table.
401 */
402 int subtableFormat = buffer.getChar(offset);
403 long subtableLength;
404 if (subtableFormat < 8) {
405 subtableLength = buffer.getChar(offset+2);
406 } else {
407 subtableLength = buffer.getInt(offset+4) & INTMASK;
408 }
409 if (offset+subtableLength > buffer.capacity()) {
410 if (FontManager.logging) {
411 FontManager.logger.warning("Cmap subtable overflows buffer.");
412 }
413 }
414 switch (subtableFormat) {
415 case 0: return new CMapFormat0(buffer, offset);
416 case 2: return new CMapFormat2(buffer, offset, xlat);
417 case 4: return new CMapFormat4(buffer, offset, xlat);
418 case 6: return new CMapFormat6(buffer, offset, xlat);
419 case 8: return new CMapFormat8(buffer, offset, xlat);
420 case 10: return new CMapFormat10(buffer, offset, xlat);
421 case 12: return new CMapFormat12(buffer, offset, xlat);
422 default: throw new RuntimeException("Cmap format unimplemented: " +
423 (int)buffer.getChar(offset));
424 }
425 }
426
427/*
428 final char charVal(byte[] cmap, int index) {
429 return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
430 }
431
432 final short shortVal(byte[] cmap, int index) {
433 return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
434 }
435*/
436 abstract char getGlyph(int charCode);
437
438 /* Format 4 Header is
439 * ushort format (off=0)
440 * ushort length (off=2)
441 * ushort language (off=4)
442 * ushort segCountX2 (off=6)
443 * ushort searchRange (off=8)
444 * ushort entrySelector (off=10)
445 * ushort rangeShift (off=12)
446 * ushort endCount[segCount] (off=14)
447 * ushort reservedPad
448 * ushort startCount[segCount]
449 * short idDelta[segCount]
450 * idRangeOFfset[segCount]
451 * ushort glyphIdArray[]
452 */
453 static class CMapFormat4 extends CMap {
454 int segCount;
455 int entrySelector;
456 int rangeShift;
457 char[] endCount;
458 char[] startCount;
459 short[] idDelta;
460 char[] idRangeOffset;
461 char[] glyphIds;
462
463 CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
464
465 this.xlat = xlat;
466
467 bbuffer.position(offset);
468 CharBuffer buffer = bbuffer.asCharBuffer();
469 buffer.get(); // skip, we already know format=4
470 int subtableLength = buffer.get();
471 /* Try to recover from some bad fonts which specify a subtable
472 * length that would overflow the byte buffer holding the whole
473 * cmap table. If this isn't a recoverable situation an exception
474 * may be thrown which is caught higher up the call stack.
475 * Whilst this may seem lenient, in practice, unless the "bad"
476 * subtable we are using is the last one in the cmap table we
477 * would have no way of knowing about this problem anyway.
478 */
479 if (offset+subtableLength > bbuffer.capacity()) {
480 subtableLength = bbuffer.capacity() - offset;
481 }
482 buffer.get(); // skip language
483 segCount = buffer.get()/2;
484 int searchRange = buffer.get();
485 entrySelector = buffer.get();
486 rangeShift = buffer.get()/2;
487 startCount = new char[segCount];
488 endCount = new char[segCount];
489 idDelta = new short[segCount];
490 idRangeOffset = new char[segCount];
491
492 for (int i=0; i<segCount; i++) {
493 endCount[i] = buffer.get();
494 }
495 buffer.get(); // 2 bytes for reserved pad
496 for (int i=0; i<segCount; i++) {
497 startCount[i] = buffer.get();
498 }
499
500 for (int i=0; i<segCount; i++) {
501 idDelta[i] = (short)buffer.get();
502 }
503
504 for (int i=0; i<segCount; i++) {
505 char ctmp = buffer.get();
506 idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
507 }
508 /* Can calculate the number of glyph IDs by subtracting
509 * "pos" from the length of the cmap
510 */
511 int pos = (segCount*8+16)/2;
512 buffer.position(pos);
513 int numGlyphIds = (subtableLength/2 - pos);
514 glyphIds = new char[numGlyphIds];
515 for (int i=0;i<numGlyphIds;i++) {
516 glyphIds[i] = buffer.get();
517 }
518/*
519 System.err.println("segcount="+segCount);
520 System.err.println("entrySelector="+entrySelector);
521 System.err.println("rangeShift="+rangeShift);
522 for (int j=0;j<segCount;j++) {
523 System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
524 " ec="+(int)(endCount[j]&0xffff)+
525 " delta="+idDelta[j] +
526 " ro="+(int)idRangeOffset[j]);
527 }
528
529 //System.err.println("numglyphs="+glyphIds.length);
530 for (int i=0;i<numGlyphIds;i++) {
531 System.err.println("gid["+i+"]="+(int)glyphIds[i]);
532 }
533*/
534 }
535
536 char getGlyph(int charCode) {
537
538 int index = 0;
539 char glyphCode = 0;
540
541 int controlGlyph = getControlCodeGlyph(charCode, true);
542 if (controlGlyph >= 0) {
543 return (char)controlGlyph;
544 }
545
546 /* presence of translation array indicates that this
547 * cmap is in some other (non-unicode encoding).
548 * In order to look-up a char->glyph mapping we need to
549 * translate the unicode code point to the encoding of
550 * the cmap.
551 * REMIND: VALID CHARCODES??
552 */
553 if (xlat != null) {
554 charCode = xlat[charCode];
555 }
556
557 /*
558 * Citation from the TrueType (and OpenType) spec:
559 * The segments are sorted in order of increasing endCode
560 * values, and the segment values are specified in four parallel
561 * arrays. You search for the first endCode that is greater than
562 * or equal to the character code you want to map. If the
563 * corresponding startCode is less than or equal to the
564 * character code, then you use the corresponding idDelta and
565 * idRangeOffset to map the character code to a glyph index
566 * (otherwise, the missingGlyph is returned).
567 */
568
569 /*
570 * CMAP format4 defines several fields for optimized search of
571 * the segment list (entrySelector, searchRange, rangeShift).
572 * However, benefits are neglible and some fonts have incorrect
573 * data - so we use straightforward binary search (see bug 6247425)
574 */
575 int left = 0, right = startCount.length;
576 index = startCount.length >> 1;
577 while (left < right) {
578 if (endCount[index] < charCode) {
579 left = index + 1;
580 } else {
581 right = index;
582 }
583 index = (left + right) >> 1;
584 }
585
586 if (charCode >= startCount[index] && charCode <= endCount[index]) {
587 int rangeOffset = idRangeOffset[index];
588
589 if (rangeOffset == 0) {
590 glyphCode = (char)(charCode + idDelta[index]);
591 } else {
592 /* Calculate an index into the glyphIds array */
593
594/*
595 System.err.println("rangeoffset="+rangeOffset+
596 " charCode=" + charCode +
597 " scnt["+index+"]="+(int)startCount[index] +
598 " segCnt="+segCount);
599*/
600
601 int glyphIDIndex = rangeOffset - segCount + index
602 + (charCode - startCount[index]);
603 glyphCode = glyphIds[glyphIDIndex];
604 if (glyphCode != 0) {
605 glyphCode = (char)(glyphCode + idDelta[index]);
606 }
607 }
608 }
609 if (glyphCode != 0) {
610 //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);
611 }
612 return glyphCode;
613 }
614 }
615
616 // Format 0: Byte Encoding table
617 static class CMapFormat0 extends CMap {
618 byte [] cmap;
619
620 CMapFormat0(ByteBuffer buffer, int offset) {
621
622 /* skip 6 bytes of format, length, and version */
623 int len = buffer.getChar(offset+2);
624 cmap = new byte[len-6];
625 buffer.position(offset+6);
626 buffer.get(cmap);
627 }
628
629 char getGlyph(int charCode) {
630 if (charCode < 256) {
631 if (charCode < 0x0010) {
632 switch (charCode) {
633 case 0x0009:
634 case 0x000a:
635 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
636 }
637 }
638 return (char)(0xff & cmap[charCode]);
639 } else {
640 return 0;
641 }
642 }
643 }
644
645// static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
646
647// CMap cmap = createCMap(buffer, offset, null);
648// if (cmap == null) {
649// return null;
650// } else {
651// return new CMapFormatSymbol(cmap, syms);
652// }
653// }
654
655// static class CMapFormatSymbol extends CMap {
656
657// CMap cmap;
658// static final int NUM_BUCKETS = 128;
659// Bucket[] buckets = new Bucket[NUM_BUCKETS];
660
661// class Bucket {
662// char unicode;
663// char glyph;
664// Bucket next;
665
666// Bucket(char u, char g) {
667// unicode = u;
668// glyph = g;
669// }
670// }
671
672// CMapFormatSymbol(CMap cmap, char[] syms) {
673
674// this.cmap = cmap;
675
676// for (int i=0;i<syms.length;i++) {
677// char unicode = syms[i];
678// if (unicode != noSuchChar) {
679// char glyph = cmap.getGlyph(i + 0xf000);
680// int hash = unicode % NUM_BUCKETS;
681// Bucket bucket = new Bucket(unicode, glyph);
682// if (buckets[hash] == null) {
683// buckets[hash] = bucket;
684// } else {
685// Bucket b = buckets[hash];
686// while (b.next != null) {
687// b = b.next;
688// }
689// b.next = bucket;
690// }
691// }
692// }
693// }
694
695// char getGlyph(int unicode) {
696// if (unicode >= 0x1000) {
697// return 0;
698// }
699// else if (unicode >=0xf000 && unicode < 0xf100) {
700// return cmap.getGlyph(unicode);
701// } else {
702// Bucket b = buckets[unicode % NUM_BUCKETS];
703// while (b != null) {
704// if (b.unicode == unicode) {
705// return b.glyph;
706// } else {
707// b = b.next;
708// }
709// }
710// return 0;
711// }
712// }
713// }
714
715 // Format 2: High-byte mapping through table
716 static class CMapFormat2 extends CMap {
717
718 char[] subHeaderKey = new char[256];
719 /* Store subheaders in individual arrays
720 * A SubHeader entry theortically looks like {
721 * char firstCode;
722 * char entryCount;
723 * short idDelta;
724 * char idRangeOffset;
725 * }
726 */
727 char[] firstCodeArray;
728 char[] entryCountArray;
729 short[] idDeltaArray;
730 char[] idRangeOffSetArray;
731
732 char[] glyphIndexArray;
733
734 CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
735
736 this.xlat = xlat;
737
738 int tableLen = buffer.getChar(offset+2);
739 buffer.position(offset+6);
740 CharBuffer cBuffer = buffer.asCharBuffer();
741 char maxSubHeader = 0;
742 for (int i=0;i<256;i++) {
743 subHeaderKey[i] = cBuffer.get();
744 if (subHeaderKey[i] > maxSubHeader) {
745 maxSubHeader = subHeaderKey[i];
746 }
747 }
748 /* The value of the subHeaderKey is 8 * the subHeader index,
749 * so the number of subHeaders can be obtained by dividing
750 * this value bv 8 and adding 1.
751 */
752 int numSubHeaders = (maxSubHeader >> 3) +1;
753 firstCodeArray = new char[numSubHeaders];
754 entryCountArray = new char[numSubHeaders];
755 idDeltaArray = new short[numSubHeaders];
756 idRangeOffSetArray = new char[numSubHeaders];
757 for (int i=0; i<numSubHeaders; i++) {
758 firstCodeArray[i] = cBuffer.get();
759 entryCountArray[i] = cBuffer.get();
760 idDeltaArray[i] = (short)cBuffer.get();
761 idRangeOffSetArray[i] = cBuffer.get();
762// System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
763// " ec="+(int)entryCountArray[i]+
764// " delta="+(int)idDeltaArray[i]+
765// " offset="+(int)idRangeOffSetArray[i]);
766 }
767
768 int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
769 glyphIndexArray = new char[glyphIndexArrSize];
770 for (int i=0; i<glyphIndexArrSize;i++) {
771 glyphIndexArray[i] = cBuffer.get();
772 }
773 }
774
775 char getGlyph(int charCode) {
776 int controlGlyph = getControlCodeGlyph(charCode, true);
777 if (controlGlyph >= 0) {
778 return (char)controlGlyph;
779 }
780
781 if (xlat != null) {
782 charCode = xlat[charCode];
783 }
784
785 char highByte = (char)(charCode >> 8);
786 char lowByte = (char)(charCode & 0xff);
787 int key = subHeaderKey[highByte]>>3; // index into subHeaders
788 char mapMe;
789
790 if (key != 0) {
791 mapMe = lowByte;
792 } else {
793 mapMe = highByte;
794 if (mapMe == 0) {
795 mapMe = lowByte;
796 }
797 }
798
799// System.err.println("charCode="+Integer.toHexString(charCode)+
800// " key="+key+ " mapMe="+Integer.toHexString(mapMe));
801 char firstCode = firstCodeArray[key];
802 if (mapMe < firstCode) {
803 return 0;
804 } else {
805 mapMe -= firstCode;
806 }
807
808 if (mapMe < entryCountArray[key]) {
809 /* "address" arithmetic is needed to calculate the offset
810 * into glyphIndexArray. "idRangeOffSetArray[key]" specifies
811 * the number of bytes from that location in the table where
812 * the subarray of glyphIndexes starting at "firstCode" begins.
813 * Each entry in the subHeader table is 8 bytes, and the
814 * idRangeOffSetArray field is at offset 6 in the entry.
815 * The glyphIndexArray immediately follows the subHeaders.
816 * So if there are "N" entries then the number of bytes to the
817 * start of glyphIndexArray is (N-key)*8-6.
818 * Subtract this from the idRangeOffSetArray value to get
819 * the number of bytes into glyphIndexArray and divide by 2 to
820 * get the (char) array index.
821 */
822 int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
823 int glyphSubArrayStart =
824 (idRangeOffSetArray[key] - glyphArrayOffset)/2;
825 char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
826 if (glyphCode != 0) {
827 glyphCode += idDeltaArray[key]; //idDelta
828 return glyphCode;
829 }
830 }
831 return 0;
832 }
833 }
834
835 // Format 6: Trimmed table mapping
836 static class CMapFormat6 extends CMap {
837
838 char firstCode;
839 char entryCount;
840 char[] glyphIdArray;
841
842 CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
843
844 System.err.println("WARNING: CMapFormat8 is untested.");
845 bbuffer.position(offset+6);
846 CharBuffer buffer = bbuffer.asCharBuffer();
847 firstCode = buffer.get();
848 entryCount = buffer.get();
849 glyphIdArray = new char[entryCount];
850 for (int i=0; i< entryCount; i++) {
851 glyphIdArray[i] = buffer.get();
852 }
853 }
854
855 char getGlyph(int charCode) {
856 int controlGlyph = getControlCodeGlyph(charCode, true);
857 if (controlGlyph >= 0) {
858 return (char)controlGlyph;
859 }
860
861 if (xlat != null) {
862 charCode = xlat[charCode];
863 }
864
865 charCode -= firstCode;
866 if (charCode < 0 || charCode >= entryCount) {
867 return 0;
868 } else {
869 return glyphIdArray[charCode];
870 }
871 }
872 }
873
874 // Format 8: mixed 16-bit and 32-bit coverage
875 // Seems unlikely this code will ever get tested as we look for
876 // MS platform Cmaps and MS states (in the Opentype spec on their website)
877 // that MS doesn't support this format
878 static class CMapFormat8 extends CMap {
879 byte[] is32 = new byte[8192];
880 int nGroups;
881 int[] startCharCode;
882 int[] endCharCode;
883 int[] startGlyphID;
884
885 CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
886
887 System.err.println("WARNING: CMapFormat8 is untested.");
888 bbuffer.position(12);
889 bbuffer.get(is32);
890 nGroups = bbuffer.getInt();
891 startCharCode = new int[nGroups];
892 endCharCode = new int[nGroups];
893 startGlyphID = new int[nGroups];
894 }
895
896 char getGlyph(int charCode) {
897 if (xlat != null) {
898 throw new RuntimeException("xlat array for cmap fmt=8");
899 }
900 return 0;
901 }
902
903 }
904
905
906 // Format 4-byte 10: Trimmed table mapping
907 // Seems unlikely this code will ever get tested as we look for
908 // MS platform Cmaps and MS states (in the Opentype spec on their website)
909 // that MS doesn't support this format
910 static class CMapFormat10 extends CMap {
911
912 long firstCode;
913 int entryCount;
914 char[] glyphIdArray;
915
916 CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
917
918 System.err.println("WARNING: CMapFormat10 is untested.");
919 firstCode = bbuffer.getInt() & INTMASK;
920 entryCount = bbuffer.getInt() & INTMASK;
921 bbuffer.position(offset+20);
922 CharBuffer buffer = bbuffer.asCharBuffer();
923 glyphIdArray = new char[entryCount];
924 for (int i=0; i< entryCount; i++) {
925 glyphIdArray[i] = buffer.get();
926 }
927 }
928
929 char getGlyph(int charCode) {
930
931 if (xlat != null) {
932 throw new RuntimeException("xlat array for cmap fmt=10");
933 }
934
935 int code = (int)(charCode - firstCode);
936 if (code < 0 || code >= entryCount) {
937 return 0;
938 } else {
939 return glyphIdArray[code];
940 }
941 }
942 }
943
944 // Format 12: Segmented coverage for UCS-4 (fonts supporting
945 // surrogate pairs)
946 static class CMapFormat12 extends CMap {
947
948 int numGroups;
949 int highBit =0;
950 int power;
951 int extra;
952 long[] startCharCode;
953 long[] endCharCode;
954 int[] startGlyphID;
955
956 CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
957 if (xlat != null) {
958 throw new RuntimeException("xlat array for cmap fmt=12");
959 }
960
961 numGroups = buffer.getInt(offset+12);
962 startCharCode = new long[numGroups];
963 endCharCode = new long[numGroups];
964 startGlyphID = new int[numGroups];
965 buffer.position(offset+16);
966 buffer = buffer.slice();
967 IntBuffer ibuffer = buffer.asIntBuffer();
968 for (int i=0; i<numGroups; i++) {
969 startCharCode[i] = ibuffer.get() & INTMASK;
970 endCharCode[i] = ibuffer.get() & INTMASK;
971 startGlyphID[i] = ibuffer.get() & INTMASK;
972 }
973
974 /* Finds the high bit by binary searching through the bits */
975 int value = numGroups;
976
977 if (value >= 1 << 16) {
978 value >>= 16;
979 highBit += 16;
980 }
981
982 if (value >= 1 << 8) {
983 value >>= 8;
984 highBit += 8;
985 }
986
987 if (value >= 1 << 4) {
988 value >>= 4;
989 highBit += 4;
990 }
991
992 if (value >= 1 << 2) {
993 value >>= 2;
994 highBit += 2;
995 }
996
997 if (value >= 1 << 1) {
998 value >>= 1;
999 highBit += 1;
1000 }
1001
1002 power = 1 << highBit;
1003 extra = numGroups - power;
1004 }
1005
1006 char getGlyph(int charCode) {
1007 int controlGlyph = getControlCodeGlyph(charCode, false);
1008 if (controlGlyph >= 0) {
1009 return (char)controlGlyph;
1010 }
1011 int probe = power;
1012 int range = 0;
1013
1014 if (startCharCode[extra] <= charCode) {
1015 range = extra;
1016 }
1017
1018 while (probe > 1) {
1019 probe >>= 1;
1020
1021 if (startCharCode[range+probe] <= charCode) {
1022 range += probe;
1023 }
1024 }
1025
1026 if (startCharCode[range] <= charCode &&
1027 endCharCode[range] >= charCode) {
1028 return (char)
1029 (startGlyphID[range] + (charCode - startCharCode[range]));
1030 }
1031
1032 return 0;
1033 }
1034
1035 }
1036
1037 /* Used to substitute for bad Cmaps. */
1038 static class NullCMapClass extends CMap {
1039
1040 char getGlyph(int charCode) {
1041 return 0;
1042 }
1043 }
1044
1045 public static final NullCMapClass theNullCmap = new NullCMapClass();
1046
1047 final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
1048 if (charCode < 0x0010) {
1049 switch (charCode) {
1050 case 0x0009:
1051 case 0x000a:
1052 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1053 }
1054 } else if (charCode >= 0x200c) {
1055 if ((charCode <= 0x200f) ||
1056 (charCode >= 0x2028 && charCode <= 0x202e) ||
1057 (charCode >= 0x206a && charCode <= 0x206f)) {
1058 return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1059 } else if (noSurrogates && charCode >= 0xFFFF) {
1060 return 0;
1061 }
1062 }
1063 return -1;
1064 }
1065}