| /* |
| * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package sun.font; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.IntBuffer; |
| import java.util.Locale; |
| import java.nio.charset.*; |
| |
| /* |
| * A tt font has a CMAP table which is in turn made up of sub-tables which |
| * describe the char to glyph mapping in (possibly) multiple ways. |
| * CMAP subtables are described by 3 values. |
| * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK) |
| * 2. Encoding (eg 0=symbol, 1=unicode) |
| * 3. TrueType subtable format (how the char->glyph mapping for the encoding |
| * is stored in the subtable). See the TrueType spec. Format 4 is required |
| * by MS in fonts for windows. Its uses segmented mapping to delta values. |
| * Most typically we see are (3,1,4) : |
| * CMAP Platform ID=3 is what we use. |
| * Encodings that are used in practice by JDK on Solaris are |
| * symbol (3,0) |
| * unicode (3,1) |
| * GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5) |
| * The format for almost all subtables is 4. However the solaris (3,5) |
| * encodings are typically in format 2. |
| */ |
| abstract class CMap { |
| |
| // static char WingDings_b2c[] = { |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d, |
| // 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd, |
| // 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd, |
| // 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, |
| // 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d, |
| // 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd, |
| // }; |
| |
| // static char Symbols_b2c[] = { |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d, |
| // 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393, |
| // 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, |
| // 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9, |
| // 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd, |
| // 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3, |
| // 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf, |
| // 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9, |
| // 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219, |
| // 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229, |
| // 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, |
| // 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5, |
| // 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, |
| // }; |
| |
| static final short ShiftJISEncoding = 2; |
| static final short GBKEncoding = 3; |
| static final short Big5Encoding = 4; |
| static final short WansungEncoding = 5; |
| static final short JohabEncoding = 6; |
| static final short MSUnicodeSurrogateEncoding = 10; |
| |
| static final char noSuchChar = (char)0xfffd; |
| static final int SHORTMASK = 0x0000ffff; |
| static final int INTMASK = 0xffffffff; |
| |
| static final char[][] converterMaps = new char[7][]; |
| |
| /* |
| * Unicode->other encoding translation array. A pre-computed look up |
| * which can be shared across all fonts using that encoding. |
| * Using this saves running character coverters repeatedly. |
| */ |
| char[] xlat; |
| |
| static CMap initialize(TrueTypeFont font) { |
| |
| CMap cmap = null; |
| |
| int offset, platformID, encodingID=-1; |
| |
| int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0, |
| three6=0, three10=0; |
| boolean threeStar = false; |
| |
| ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag); |
| int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag); |
| short numberSubTables = cmapBuffer.getShort(2); |
| |
| /* locate the offsets of all 3,* (ie Microsoft platform) encodings */ |
| for (int i=0; i<numberSubTables; i++) { |
| cmapBuffer.position(i * 8 + 4); |
| platformID = cmapBuffer.getShort(); |
| if (platformID == 3) { |
| threeStar = true; |
| encodingID = cmapBuffer.getShort(); |
| offset = cmapBuffer.getInt(); |
| switch (encodingID) { |
| case 0: three0 = offset; break; // MS Symbol encoding |
| case 1: three1 = offset; break; // MS Unicode cmap |
| case 2: three2 = offset; break; // ShiftJIS cmap. |
| case 3: three3 = offset; break; // GBK cmap |
| case 4: three4 = offset; break; // Big 5 cmap |
| case 5: three5 = offset; break; // Wansung |
| case 6: three6 = offset; break; // Johab |
| case 10: three10 = offset; break; // MS Unicode surrogates |
| } |
| } |
| } |
| |
| /* This defines the preference order for cmap subtables */ |
| if (threeStar) { |
| if (three10 != 0) { |
| cmap = createCMap(cmapBuffer, three10, null); |
| } |
| else if (three0 != 0) { |
| /* The special case treatment of these fonts leads to |
| * anomalies where a user can view "wingdings" and "wingdings2" |
| * and the latter shows all its code points in the unicode |
| * private use area at 0xF000->0XF0FF and the former shows |
| * a scattered subset of its glyphs that are known mappings to |
| * unicode code points. |
| * The primary purpose of these mappings was to facilitate |
| * display of symbol chars etc in composite fonts, however |
| * this is not needed as all these code points are covered |
| * by Lucida Sans Regular. |
| * Commenting this out reduces the role of these two files |
| * (assuming that they continue to be used in font.properties) |
| * to just one of contributing to the overall composite |
| * font metrics, and also AWT can still access the fonts. |
| * Clients which explicitly accessed these fonts as names |
| * "Symbol" and "Wingdings" (ie as physical fonts) and |
| * expected to see a scattering of these characters will |
| * see them now as missing. How much of a problem is this? |
| * Perhaps we could still support this mapping just for |
| * "Symbol.ttf" but I suspect some users would prefer it |
| * to be mapped in to the Latin range as that is how |
| * the "symbol" font is used in native apps. |
| */ |
| // String name = font.platName.toLowerCase(Locale.ENGLISH); |
| // if (name.endsWith("symbol.ttf")) { |
| // cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c); |
| // } else if (name.endsWith("wingding.ttf")) { |
| // cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c); |
| // } else { |
| cmap = createCMap(cmapBuffer, three0, null); |
| // } |
| } |
| else if (three1 != 0) { |
| cmap = createCMap(cmapBuffer, three1, null); |
| } |
| else if (three2 != 0) { |
| cmap = createCMap(cmapBuffer, three2, |
| getConverterMap(ShiftJISEncoding)); |
| } |
| else if (three3 != 0) { |
| cmap = createCMap(cmapBuffer, three3, |
| getConverterMap(GBKEncoding)); |
| } |
| else if (three4 != 0) { |
| /* GB2312 TrueType fonts on Solaris have wrong encoding ID for |
| * cmap table, these fonts have EncodingID 4 which is Big5 |
| * encoding according the TrueType spec, but actually the |
| * fonts are using gb2312 encoding, have to use this |
| * workaround to make Solaris zh_CN locale work. -sherman |
| */ |
| if (FontUtilities.isSolaris && font.platName != null && |
| (font.platName.startsWith( |
| "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") || |
| font.platName.startsWith( |
| "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") || |
| font.platName.startsWith( |
| "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) { |
| cmap = createCMap(cmapBuffer, three4, |
| getConverterMap(GBKEncoding)); |
| } |
| else { |
| cmap = createCMap(cmapBuffer, three4, |
| getConverterMap(Big5Encoding)); |
| } |
| } |
| else if (three5 != 0) { |
| cmap = createCMap(cmapBuffer, three5, |
| getConverterMap(WansungEncoding)); |
| } |
| else if (three6 != 0) { |
| cmap = createCMap(cmapBuffer, three6, |
| getConverterMap(JohabEncoding)); |
| } |
| } else { |
| /* No 3,* subtable was found. Just use whatever is the first |
| * table listed. Not very useful but maybe better than |
| * rejecting the font entirely? |
| */ |
| cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null); |
| } |
| return cmap; |
| } |
| |
| /* speed up the converting by setting the range for double |
| * byte characters; |
| */ |
| static char[] getConverter(short encodingID) { |
| int dBegin = 0x8000; |
| int dEnd = 0xffff; |
| String encoding; |
| |
| switch (encodingID) { |
| case ShiftJISEncoding: |
| dBegin = 0x8140; |
| dEnd = 0xfcfc; |
| encoding = "SJIS"; |
| break; |
| case GBKEncoding: |
| dBegin = 0x8140; |
| dEnd = 0xfea0; |
| encoding = "GBK"; |
| break; |
| case Big5Encoding: |
| dBegin = 0xa140; |
| dEnd = 0xfefe; |
| encoding = "Big5"; |
| break; |
| case WansungEncoding: |
| dBegin = 0xa1a1; |
| dEnd = 0xfede; |
| encoding = "EUC_KR"; |
| break; |
| case JohabEncoding: |
| dBegin = 0x8141; |
| dEnd = 0xfdfe; |
| encoding = "Johab"; |
| break; |
| default: |
| return null; |
| } |
| |
| try { |
| char[] convertedChars = new char[65536]; |
| for (int i=0; i<65536; i++) { |
| convertedChars[i] = noSuchChar; |
| } |
| |
| byte[] inputBytes = new byte[(dEnd-dBegin+1)*2]; |
| char[] outputChars = new char[(dEnd-dBegin+1)]; |
| |
| int j = 0; |
| int firstByte; |
| if (encodingID == ShiftJISEncoding) { |
| for (int i = dBegin; i <= dEnd; i++) { |
| firstByte = (i >> 8 & 0xff); |
| if (firstByte >= 0xa1 && firstByte <= 0xdf) { |
| //sjis halfwidth katakana |
| inputBytes[j++] = (byte)0xff; |
| inputBytes[j++] = (byte)0xff; |
| } else { |
| inputBytes[j++] = (byte)firstByte; |
| inputBytes[j++] = (byte)(i & 0xff); |
| } |
| } |
| } else { |
| for (int i = dBegin; i <= dEnd; i++) { |
| inputBytes[j++] = (byte)(i>>8 & 0xff); |
| inputBytes[j++] = (byte)(i & 0xff); |
| } |
| } |
| |
| Charset.forName(encoding).newDecoder() |
| .onMalformedInput(CodingErrorAction.REPLACE) |
| .onUnmappableCharacter(CodingErrorAction.REPLACE) |
| .replaceWith("\u0000") |
| .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length), |
| CharBuffer.wrap(outputChars, 0, outputChars.length), |
| true); |
| |
| // ensure single byte ascii |
| for (int i = 0x20; i <= 0x7e; i++) { |
| convertedChars[i] = (char)i; |
| } |
| |
| //sjis halfwidth katakana |
| if (encodingID == ShiftJISEncoding) { |
| for (int i = 0xa1; i <= 0xdf; i++) { |
| convertedChars[i] = (char)(i - 0xa1 + 0xff61); |
| } |
| } |
| |
| /* It would save heap space (approx 60Kbytes for each of these |
| * converters) if stored only valid ranges (ie returned |
| * outputChars directly. But this is tricky since want to |
| * include the ASCII range too. |
| */ |
| // System.err.println("oc.len="+outputChars.length); |
| // System.err.println("cc.len="+convertedChars.length); |
| // System.err.println("dbegin="+dBegin); |
| System.arraycopy(outputChars, 0, convertedChars, dBegin, |
| outputChars.length); |
| |
| //return convertedChars; |
| /* invert this map as now want it to map from Unicode |
| * to other encoding. |
| */ |
| char [] invertedChars = new char[65536]; |
| for (int i=0;i<65536;i++) { |
| if (convertedChars[i] != noSuchChar) { |
| invertedChars[convertedChars[i]] = (char)i; |
| } |
| } |
| return invertedChars; |
| |
| } catch (Exception e) { |
| e.printStackTrace(); |
| } |
| return null; |
| } |
| |
| /* |
| * The returned array maps to unicode from some other 2 byte encoding |
| * eg for a 2byte index which represents a SJIS char, the indexed |
| * value is the corresponding unicode char. |
| */ |
| static char[] getConverterMap(short encodingID) { |
| if (converterMaps[encodingID] == null) { |
| converterMaps[encodingID] = getConverter(encodingID); |
| } |
| return converterMaps[encodingID]; |
| } |
| |
| |
| static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) { |
| /* First do a sanity check that this cmap subtable is contained |
| * within the cmap table. |
| */ |
| int subtableFormat = buffer.getChar(offset); |
| long subtableLength; |
| if (subtableFormat < 8) { |
| subtableLength = buffer.getChar(offset+2); |
| } else { |
| subtableLength = buffer.getInt(offset+4) & INTMASK; |
| } |
| if (offset+subtableLength > buffer.capacity()) { |
| if (FontUtilities.isLogging()) { |
| FontUtilities.getLogger().warning("Cmap subtable overflows buffer."); |
| } |
| } |
| switch (subtableFormat) { |
| case 0: return new CMapFormat0(buffer, offset); |
| case 2: return new CMapFormat2(buffer, offset, xlat); |
| case 4: return new CMapFormat4(buffer, offset, xlat); |
| case 6: return new CMapFormat6(buffer, offset, xlat); |
| case 8: return new CMapFormat8(buffer, offset, xlat); |
| case 10: return new CMapFormat10(buffer, offset, xlat); |
| case 12: return new CMapFormat12(buffer, offset, xlat); |
| default: throw new RuntimeException("Cmap format unimplemented: " + |
| (int)buffer.getChar(offset)); |
| } |
| } |
| |
| /* |
| final char charVal(byte[] cmap, int index) { |
| return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); |
| } |
| |
| final short shortVal(byte[] cmap, int index) { |
| return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); |
| } |
| */ |
| abstract char getGlyph(int charCode); |
| |
| /* Format 4 Header is |
| * ushort format (off=0) |
| * ushort length (off=2) |
| * ushort language (off=4) |
| * ushort segCountX2 (off=6) |
| * ushort searchRange (off=8) |
| * ushort entrySelector (off=10) |
| * ushort rangeShift (off=12) |
| * ushort endCount[segCount] (off=14) |
| * ushort reservedPad |
| * ushort startCount[segCount] |
| * short idDelta[segCount] |
| * idRangeOFfset[segCount] |
| * ushort glyphIdArray[] |
| */ |
| static class CMapFormat4 extends CMap { |
| int segCount; |
| int entrySelector; |
| int rangeShift; |
| char[] endCount; |
| char[] startCount; |
| short[] idDelta; |
| char[] idRangeOffset; |
| char[] glyphIds; |
| |
| CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) { |
| |
| this.xlat = xlat; |
| |
| bbuffer.position(offset); |
| CharBuffer buffer = bbuffer.asCharBuffer(); |
| buffer.get(); // skip, we already know format=4 |
| int subtableLength = buffer.get(); |
| /* Try to recover from some bad fonts which specify a subtable |
| * length that would overflow the byte buffer holding the whole |
| * cmap table. If this isn't a recoverable situation an exception |
| * may be thrown which is caught higher up the call stack. |
| * Whilst this may seem lenient, in practice, unless the "bad" |
| * subtable we are using is the last one in the cmap table we |
| * would have no way of knowing about this problem anyway. |
| */ |
| if (offset+subtableLength > bbuffer.capacity()) { |
| subtableLength = bbuffer.capacity() - offset; |
| } |
| buffer.get(); // skip language |
| segCount = buffer.get()/2; |
| int searchRange = buffer.get(); |
| entrySelector = buffer.get(); |
| rangeShift = buffer.get()/2; |
| startCount = new char[segCount]; |
| endCount = new char[segCount]; |
| idDelta = new short[segCount]; |
| idRangeOffset = new char[segCount]; |
| |
| for (int i=0; i<segCount; i++) { |
| endCount[i] = buffer.get(); |
| } |
| buffer.get(); // 2 bytes for reserved pad |
| for (int i=0; i<segCount; i++) { |
| startCount[i] = buffer.get(); |
| } |
| |
| for (int i=0; i<segCount; i++) { |
| idDelta[i] = (short)buffer.get(); |
| } |
| |
| for (int i=0; i<segCount; i++) { |
| char ctmp = buffer.get(); |
| idRangeOffset[i] = (char)((ctmp>>1)&0xffff); |
| } |
| /* Can calculate the number of glyph IDs by subtracting |
| * "pos" from the length of the cmap |
| */ |
| int pos = (segCount*8+16)/2; |
| buffer.position(pos); |
| int numGlyphIds = (subtableLength/2 - pos); |
| glyphIds = new char[numGlyphIds]; |
| for (int i=0;i<numGlyphIds;i++) { |
| glyphIds[i] = buffer.get(); |
| } |
| /* |
| System.err.println("segcount="+segCount); |
| System.err.println("entrySelector="+entrySelector); |
| System.err.println("rangeShift="+rangeShift); |
| for (int j=0;j<segCount;j++) { |
| System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+ |
| " ec="+(int)(endCount[j]&0xffff)+ |
| " delta="+idDelta[j] + |
| " ro="+(int)idRangeOffset[j]); |
| } |
| |
| //System.err.println("numglyphs="+glyphIds.length); |
| for (int i=0;i<numGlyphIds;i++) { |
| System.err.println("gid["+i+"]="+(int)glyphIds[i]); |
| } |
| */ |
| } |
| |
| char getGlyph(int charCode) { |
| |
| int index = 0; |
| char glyphCode = 0; |
| |
| int controlGlyph = getControlCodeGlyph(charCode, true); |
| if (controlGlyph >= 0) { |
| return (char)controlGlyph; |
| } |
| |
| /* presence of translation array indicates that this |
| * cmap is in some other (non-unicode encoding). |
| * In order to look-up a char->glyph mapping we need to |
| * translate the unicode code point to the encoding of |
| * the cmap. |
| * REMIND: VALID CHARCODES?? |
| */ |
| if (xlat != null) { |
| charCode = xlat[charCode]; |
| } |
| |
| /* |
| * Citation from the TrueType (and OpenType) spec: |
| * The segments are sorted in order of increasing endCode |
| * values, and the segment values are specified in four parallel |
| * arrays. You search for the first endCode that is greater than |
| * or equal to the character code you want to map. If the |
| * corresponding startCode is less than or equal to the |
| * character code, then you use the corresponding idDelta and |
| * idRangeOffset to map the character code to a glyph index |
| * (otherwise, the missingGlyph is returned). |
| */ |
| |
| /* |
| * CMAP format4 defines several fields for optimized search of |
| * the segment list (entrySelector, searchRange, rangeShift). |
| * However, benefits are neglible and some fonts have incorrect |
| * data - so we use straightforward binary search (see bug 6247425) |
| */ |
| int left = 0, right = startCount.length; |
| index = startCount.length >> 1; |
| while (left < right) { |
| if (endCount[index] < charCode) { |
| left = index + 1; |
| } else { |
| right = index; |
| } |
| index = (left + right) >> 1; |
| } |
| |
| if (charCode >= startCount[index] && charCode <= endCount[index]) { |
| int rangeOffset = idRangeOffset[index]; |
| |
| if (rangeOffset == 0) { |
| glyphCode = (char)(charCode + idDelta[index]); |
| } else { |
| /* Calculate an index into the glyphIds array */ |
| |
| /* |
| System.err.println("rangeoffset="+rangeOffset+ |
| " charCode=" + charCode + |
| " scnt["+index+"]="+(int)startCount[index] + |
| " segCnt="+segCount); |
| */ |
| |
| int glyphIDIndex = rangeOffset - segCount + index |
| + (charCode - startCount[index]); |
| glyphCode = glyphIds[glyphIDIndex]; |
| if (glyphCode != 0) { |
| glyphCode = (char)(glyphCode + idDelta[index]); |
| } |
| } |
| } |
| if (glyphCode != 0) { |
| //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode); |
| } |
| return glyphCode; |
| } |
| } |
| |
| // Format 0: Byte Encoding table |
| static class CMapFormat0 extends CMap { |
| byte [] cmap; |
| |
| CMapFormat0(ByteBuffer buffer, int offset) { |
| |
| /* skip 6 bytes of format, length, and version */ |
| int len = buffer.getChar(offset+2); |
| cmap = new byte[len-6]; |
| buffer.position(offset+6); |
| buffer.get(cmap); |
| } |
| |
| char getGlyph(int charCode) { |
| if (charCode < 256) { |
| if (charCode < 0x0010) { |
| switch (charCode) { |
| case 0x0009: |
| case 0x000a: |
| case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; |
| } |
| } |
| return (char)(0xff & cmap[charCode]); |
| } else { |
| return 0; |
| } |
| } |
| } |
| |
| // static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) { |
| |
| // CMap cmap = createCMap(buffer, offset, null); |
| // if (cmap == null) { |
| // return null; |
| // } else { |
| // return new CMapFormatSymbol(cmap, syms); |
| // } |
| // } |
| |
| // static class CMapFormatSymbol extends CMap { |
| |
| // CMap cmap; |
| // static final int NUM_BUCKETS = 128; |
| // Bucket[] buckets = new Bucket[NUM_BUCKETS]; |
| |
| // class Bucket { |
| // char unicode; |
| // char glyph; |
| // Bucket next; |
| |
| // Bucket(char u, char g) { |
| // unicode = u; |
| // glyph = g; |
| // } |
| // } |
| |
| // CMapFormatSymbol(CMap cmap, char[] syms) { |
| |
| // this.cmap = cmap; |
| |
| // for (int i=0;i<syms.length;i++) { |
| // char unicode = syms[i]; |
| // if (unicode != noSuchChar) { |
| // char glyph = cmap.getGlyph(i + 0xf000); |
| // int hash = unicode % NUM_BUCKETS; |
| // Bucket bucket = new Bucket(unicode, glyph); |
| // if (buckets[hash] == null) { |
| // buckets[hash] = bucket; |
| // } else { |
| // Bucket b = buckets[hash]; |
| // while (b.next != null) { |
| // b = b.next; |
| // } |
| // b.next = bucket; |
| // } |
| // } |
| // } |
| // } |
| |
| // char getGlyph(int unicode) { |
| // if (unicode >= 0x1000) { |
| // return 0; |
| // } |
| // else if (unicode >=0xf000 && unicode < 0xf100) { |
| // return cmap.getGlyph(unicode); |
| // } else { |
| // Bucket b = buckets[unicode % NUM_BUCKETS]; |
| // while (b != null) { |
| // if (b.unicode == unicode) { |
| // return b.glyph; |
| // } else { |
| // b = b.next; |
| // } |
| // } |
| // return 0; |
| // } |
| // } |
| // } |
| |
| // Format 2: High-byte mapping through table |
| static class CMapFormat2 extends CMap { |
| |
| char[] subHeaderKey = new char[256]; |
| /* Store subheaders in individual arrays |
| * A SubHeader entry theortically looks like { |
| * char firstCode; |
| * char entryCount; |
| * short idDelta; |
| * char idRangeOffset; |
| * } |
| */ |
| char[] firstCodeArray; |
| char[] entryCountArray; |
| short[] idDeltaArray; |
| char[] idRangeOffSetArray; |
| |
| char[] glyphIndexArray; |
| |
| CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) { |
| |
| this.xlat = xlat; |
| |
| int tableLen = buffer.getChar(offset+2); |
| buffer.position(offset+6); |
| CharBuffer cBuffer = buffer.asCharBuffer(); |
| char maxSubHeader = 0; |
| for (int i=0;i<256;i++) { |
| subHeaderKey[i] = cBuffer.get(); |
| if (subHeaderKey[i] > maxSubHeader) { |
| maxSubHeader = subHeaderKey[i]; |
| } |
| } |
| /* The value of the subHeaderKey is 8 * the subHeader index, |
| * so the number of subHeaders can be obtained by dividing |
| * this value bv 8 and adding 1. |
| */ |
| int numSubHeaders = (maxSubHeader >> 3) +1; |
| firstCodeArray = new char[numSubHeaders]; |
| entryCountArray = new char[numSubHeaders]; |
| idDeltaArray = new short[numSubHeaders]; |
| idRangeOffSetArray = new char[numSubHeaders]; |
| for (int i=0; i<numSubHeaders; i++) { |
| firstCodeArray[i] = cBuffer.get(); |
| entryCountArray[i] = cBuffer.get(); |
| idDeltaArray[i] = (short)cBuffer.get(); |
| idRangeOffSetArray[i] = cBuffer.get(); |
| // System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+ |
| // " ec="+(int)entryCountArray[i]+ |
| // " delta="+(int)idDeltaArray[i]+ |
| // " offset="+(int)idRangeOffSetArray[i]); |
| } |
| |
| int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2; |
| glyphIndexArray = new char[glyphIndexArrSize]; |
| for (int i=0; i<glyphIndexArrSize;i++) { |
| glyphIndexArray[i] = cBuffer.get(); |
| } |
| } |
| |
| char getGlyph(int charCode) { |
| int controlGlyph = getControlCodeGlyph(charCode, true); |
| if (controlGlyph >= 0) { |
| return (char)controlGlyph; |
| } |
| |
| if (xlat != null) { |
| charCode = xlat[charCode]; |
| } |
| |
| char highByte = (char)(charCode >> 8); |
| char lowByte = (char)(charCode & 0xff); |
| int key = subHeaderKey[highByte]>>3; // index into subHeaders |
| char mapMe; |
| |
| if (key != 0) { |
| mapMe = lowByte; |
| } else { |
| mapMe = highByte; |
| if (mapMe == 0) { |
| mapMe = lowByte; |
| } |
| } |
| |
| // System.err.println("charCode="+Integer.toHexString(charCode)+ |
| // " key="+key+ " mapMe="+Integer.toHexString(mapMe)); |
| char firstCode = firstCodeArray[key]; |
| if (mapMe < firstCode) { |
| return 0; |
| } else { |
| mapMe -= firstCode; |
| } |
| |
| if (mapMe < entryCountArray[key]) { |
| /* "address" arithmetic is needed to calculate the offset |
| * into glyphIndexArray. "idRangeOffSetArray[key]" specifies |
| * the number of bytes from that location in the table where |
| * the subarray of glyphIndexes starting at "firstCode" begins. |
| * Each entry in the subHeader table is 8 bytes, and the |
| * idRangeOffSetArray field is at offset 6 in the entry. |
| * The glyphIndexArray immediately follows the subHeaders. |
| * So if there are "N" entries then the number of bytes to the |
| * start of glyphIndexArray is (N-key)*8-6. |
| * Subtract this from the idRangeOffSetArray value to get |
| * the number of bytes into glyphIndexArray and divide by 2 to |
| * get the (char) array index. |
| */ |
| int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6; |
| int glyphSubArrayStart = |
| (idRangeOffSetArray[key] - glyphArrayOffset)/2; |
| char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe]; |
| if (glyphCode != 0) { |
| glyphCode += idDeltaArray[key]; //idDelta |
| return glyphCode; |
| } |
| } |
| return 0; |
| } |
| } |
| |
| // Format 6: Trimmed table mapping |
| static class CMapFormat6 extends CMap { |
| |
| char firstCode; |
| char entryCount; |
| char[] glyphIdArray; |
| |
| CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) { |
| |
| bbuffer.position(offset+6); |
| CharBuffer buffer = bbuffer.asCharBuffer(); |
| firstCode = buffer.get(); |
| entryCount = buffer.get(); |
| glyphIdArray = new char[entryCount]; |
| for (int i=0; i< entryCount; i++) { |
| glyphIdArray[i] = buffer.get(); |
| } |
| } |
| |
| char getGlyph(int charCode) { |
| int controlGlyph = getControlCodeGlyph(charCode, true); |
| if (controlGlyph >= 0) { |
| return (char)controlGlyph; |
| } |
| |
| if (xlat != null) { |
| charCode = xlat[charCode]; |
| } |
| |
| charCode -= firstCode; |
| if (charCode < 0 || charCode >= entryCount) { |
| return 0; |
| } else { |
| return glyphIdArray[charCode]; |
| } |
| } |
| } |
| |
| // Format 8: mixed 16-bit and 32-bit coverage |
| // Seems unlikely this code will ever get tested as we look for |
| // MS platform Cmaps and MS states (in the Opentype spec on their website) |
| // that MS doesn't support this format |
| static class CMapFormat8 extends CMap { |
| byte[] is32 = new byte[8192]; |
| int nGroups; |
| int[] startCharCode; |
| int[] endCharCode; |
| int[] startGlyphID; |
| |
| CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) { |
| |
| bbuffer.position(12); |
| bbuffer.get(is32); |
| nGroups = bbuffer.getInt(); |
| startCharCode = new int[nGroups]; |
| endCharCode = new int[nGroups]; |
| startGlyphID = new int[nGroups]; |
| } |
| |
| char getGlyph(int charCode) { |
| if (xlat != null) { |
| throw new RuntimeException("xlat array for cmap fmt=8"); |
| } |
| return 0; |
| } |
| |
| } |
| |
| |
| // Format 4-byte 10: Trimmed table mapping |
| // Seems unlikely this code will ever get tested as we look for |
| // MS platform Cmaps and MS states (in the Opentype spec on their website) |
| // that MS doesn't support this format |
| static class CMapFormat10 extends CMap { |
| |
| long firstCode; |
| int entryCount; |
| char[] glyphIdArray; |
| |
| CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) { |
| |
| firstCode = bbuffer.getInt() & INTMASK; |
| entryCount = bbuffer.getInt() & INTMASK; |
| bbuffer.position(offset+20); |
| CharBuffer buffer = bbuffer.asCharBuffer(); |
| glyphIdArray = new char[entryCount]; |
| for (int i=0; i< entryCount; i++) { |
| glyphIdArray[i] = buffer.get(); |
| } |
| } |
| |
| char getGlyph(int charCode) { |
| |
| if (xlat != null) { |
| throw new RuntimeException("xlat array for cmap fmt=10"); |
| } |
| |
| int code = (int)(charCode - firstCode); |
| if (code < 0 || code >= entryCount) { |
| return 0; |
| } else { |
| return glyphIdArray[code]; |
| } |
| } |
| } |
| |
| // Format 12: Segmented coverage for UCS-4 (fonts supporting |
| // surrogate pairs) |
| static class CMapFormat12 extends CMap { |
| |
| int numGroups; |
| int highBit =0; |
| int power; |
| int extra; |
| long[] startCharCode; |
| long[] endCharCode; |
| int[] startGlyphID; |
| |
| CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) { |
| if (xlat != null) { |
| throw new RuntimeException("xlat array for cmap fmt=12"); |
| } |
| |
| numGroups = buffer.getInt(offset+12); |
| startCharCode = new long[numGroups]; |
| endCharCode = new long[numGroups]; |
| startGlyphID = new int[numGroups]; |
| buffer.position(offset+16); |
| buffer = buffer.slice(); |
| IntBuffer ibuffer = buffer.asIntBuffer(); |
| for (int i=0; i<numGroups; i++) { |
| startCharCode[i] = ibuffer.get() & INTMASK; |
| endCharCode[i] = ibuffer.get() & INTMASK; |
| startGlyphID[i] = ibuffer.get() & INTMASK; |
| } |
| |
| /* Finds the high bit by binary searching through the bits */ |
| int value = numGroups; |
| |
| if (value >= 1 << 16) { |
| value >>= 16; |
| highBit += 16; |
| } |
| |
| if (value >= 1 << 8) { |
| value >>= 8; |
| highBit += 8; |
| } |
| |
| if (value >= 1 << 4) { |
| value >>= 4; |
| highBit += 4; |
| } |
| |
| if (value >= 1 << 2) { |
| value >>= 2; |
| highBit += 2; |
| } |
| |
| if (value >= 1 << 1) { |
| value >>= 1; |
| highBit += 1; |
| } |
| |
| power = 1 << highBit; |
| extra = numGroups - power; |
| } |
| |
| char getGlyph(int charCode) { |
| int controlGlyph = getControlCodeGlyph(charCode, false); |
| if (controlGlyph >= 0) { |
| return (char)controlGlyph; |
| } |
| int probe = power; |
| int range = 0; |
| |
| if (startCharCode[extra] <= charCode) { |
| range = extra; |
| } |
| |
| while (probe > 1) { |
| probe >>= 1; |
| |
| if (startCharCode[range+probe] <= charCode) { |
| range += probe; |
| } |
| } |
| |
| if (startCharCode[range] <= charCode && |
| endCharCode[range] >= charCode) { |
| return (char) |
| (startGlyphID[range] + (charCode - startCharCode[range])); |
| } |
| |
| return 0; |
| } |
| |
| } |
| |
| /* Used to substitute for bad Cmaps. */ |
| static class NullCMapClass extends CMap { |
| |
| char getGlyph(int charCode) { |
| return 0; |
| } |
| } |
| |
| public static final NullCMapClass theNullCmap = new NullCMapClass(); |
| |
| final int getControlCodeGlyph(int charCode, boolean noSurrogates) { |
| if (charCode < 0x0010) { |
| switch (charCode) { |
| case 0x0009: |
| case 0x000a: |
| case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; |
| } |
| } else if (charCode >= 0x200c) { |
| if ((charCode <= 0x200f) || |
| (charCode >= 0x2028 && charCode <= 0x202e) || |
| (charCode >= 0x206a && charCode <= 0x206f)) { |
| return CharToGlyphMapper.INVISIBLE_GLYPH_ID; |
| } else if (noSurrogates && charCode >= 0xFFFF) { |
| return 0; |
| } |
| } |
| return -1; |
| } |
| } |