Behdad Esfahbod | 1ae2959 | 2014-01-14 15:07:50 +0800 | [diff] [blame^] | 1 | from __future__ import print_function, division, absolute_import |
Behdad Esfahbod | 30e691e | 2013-11-27 17:27:45 -0500 | [diff] [blame] | 2 | from fontTools.misc.py23 import * |
| 3 | from fontTools.misc.textTools import safeEval, readHex |
Behdad Esfahbod | 2b06aaa | 2013-11-27 02:34:11 -0500 | [diff] [blame] | 4 | from . import DefaultTable |
Behdad Esfahbod | 30e691e | 2013-11-27 17:27:45 -0500 | [diff] [blame] | 5 | import sys |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 6 | import struct |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 7 | import array |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 8 | import operator |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 9 | |
| 10 | |
| 11 | class table__c_m_a_p(DefaultTable.DefaultTable): |
| 12 | |
| 13 | def getcmap(self, platformID, platEncID): |
| 14 | for subtable in self.tables: |
| 15 | if (subtable.platformID == platformID and |
| 16 | subtable.platEncID == platEncID): |
| 17 | return subtable |
| 18 | return None # not found |
| 19 | |
| 20 | def decompile(self, data, ttFont): |
| 21 | tableVersion, numSubTables = struct.unpack(">HH", data[:4]) |
| 22 | self.tableVersion = int(tableVersion) |
| 23 | self.tables = tables = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 24 | seenOffsets = {} |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 25 | for i in range(numSubTables): |
| 26 | platformID, platEncID, offset = struct.unpack( |
| 27 | ">HHl", data[4+i*8:4+(i+1)*8]) |
| 28 | platformID, platEncID = int(platformID), int(platEncID) |
| 29 | format, length = struct.unpack(">HH", data[offset:offset+4]) |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 30 | if format in [8,10,12,13]: |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 31 | format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 32 | elif format in [14]: |
| 33 | format, length = struct.unpack(">HL", data[offset:offset+6]) |
| 34 | |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 35 | if not length: |
Behdad Esfahbod | 3ec6a25 | 2013-11-27 04:57:33 -0500 | [diff] [blame] | 36 | print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset)) |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 37 | continue |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 38 | if format not in cmap_classes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 39 | table = cmap_format_unknown(format) |
| 40 | else: |
| 41 | table = cmap_classes[format](format) |
| 42 | table.platformID = platformID |
| 43 | table.platEncID = platEncID |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 44 | # Note that by default we decompile only the subtable header info; |
| 45 | # any other data gets decompiled only when an attribute of the |
| 46 | # subtable is referenced. |
| 47 | table.decompileHeader(data[offset:offset+int(length)], ttFont) |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 48 | if offset in seenOffsets: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 49 | table.cmap = tables[seenOffsets[offset]].cmap |
| 50 | else: |
| 51 | seenOffsets[offset] = i |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 52 | tables.append(table) |
| 53 | |
| 54 | def compile(self, ttFont): |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 55 | self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 56 | numSubTables = len(self.tables) |
| 57 | totalOffset = 4 + 8 * numSubTables |
| 58 | data = struct.pack(">HH", self.tableVersion, numSubTables) |
Behdad Esfahbod | 821572c | 2013-11-27 21:09:03 -0500 | [diff] [blame] | 59 | tableData = b"" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 60 | seen = {} # Some tables are the same object reference. Don't compile them twice. |
| 61 | done = {} # Some tables are different objects, but compile to the same data chunk |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 62 | for table in self.tables: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 63 | try: |
| 64 | offset = seen[id(table.cmap)] |
| 65 | except KeyError: |
| 66 | chunk = table.compile(ttFont) |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 67 | if chunk in done: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 68 | offset = done[chunk] |
| 69 | else: |
| 70 | offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) |
| 71 | tableData = tableData + chunk |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 72 | data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) |
| 73 | return data + tableData |
| 74 | |
| 75 | def toXML(self, writer, ttFont): |
| 76 | writer.simpletag("tableVersion", version=self.tableVersion) |
| 77 | writer.newline() |
| 78 | for table in self.tables: |
| 79 | table.toXML(writer, ttFont) |
| 80 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 81 | def fromXML(self, name, attrs, content, ttFont): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 82 | if name == "tableVersion": |
| 83 | self.tableVersion = safeEval(attrs["version"]) |
| 84 | return |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 85 | if name[:12] != "cmap_format_": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 86 | return |
| 87 | if not hasattr(self, "tables"): |
| 88 | self.tables = [] |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 89 | format = safeEval(name[12:]) |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 90 | if format not in cmap_classes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 91 | table = cmap_format_unknown(format) |
| 92 | else: |
| 93 | table = cmap_classes[format](format) |
| 94 | table.platformID = safeEval(attrs["platformID"]) |
| 95 | table.platEncID = safeEval(attrs["platEncID"]) |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 96 | table.fromXML(name, attrs, content, ttFont) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 97 | self.tables.append(table) |
| 98 | |
| 99 | |
Behdad Esfahbod | e388db5 | 2013-11-28 14:26:58 -0500 | [diff] [blame] | 100 | class CmapSubtable(object): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 101 | |
| 102 | def __init__(self, format): |
| 103 | self.format = format |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 104 | self.data = None |
| 105 | self.ttFont = None |
| 106 | |
| 107 | def __getattr__(self, attr): |
| 108 | # allow lazy decompilation of subtables. |
| 109 | if attr[:2] == '__': # don't handle requests for member functions like '__lt__' |
Behdad Esfahbod | cd5aad9 | 2013-11-27 02:42:28 -0500 | [diff] [blame] | 110 | raise AttributeError(attr) |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 111 | if self.data is None: |
Behdad Esfahbod | cd5aad9 | 2013-11-27 02:42:28 -0500 | [diff] [blame] | 112 | raise AttributeError(attr) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 113 | self.decompile(None, None) # use saved data. |
| 114 | self.data = None # Once this table has been decompiled, make sure we don't |
| 115 | # just return the original data. Also avoids recursion when |
| 116 | # called with an attribute that the cmap subtable doesn't have. |
| 117 | return getattr(self, attr) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 118 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 119 | def decompileHeader(self, data, ttFont): |
| 120 | format, length, language = struct.unpack(">HHH", data[:6]) |
| 121 | assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) |
| 122 | self.format = int(format) |
| 123 | self.length = int(length) |
| 124 | self.language = int(language) |
| 125 | self.data = data[6:] |
| 126 | self.ttFont = ttFont |
| 127 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 128 | def toXML(self, writer, ttFont): |
| 129 | writer.begintag(self.__class__.__name__, [ |
| 130 | ("platformID", self.platformID), |
| 131 | ("platEncID", self.platEncID), |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 132 | ("language", self.language), |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 133 | ]) |
| 134 | writer.newline() |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 135 | codes = sorted(self.cmap.items()) |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 136 | self._writeCodes(codes, writer) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 137 | writer.endtag(self.__class__.__name__) |
| 138 | writer.newline() |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 139 | |
| 140 | def _writeCodes(self, codes, writer): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 141 | if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0: |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 142 | from fontTools.unicode import Unicode |
| 143 | isUnicode = 1 |
| 144 | else: |
| 145 | isUnicode = 0 |
| 146 | for code, name in codes: |
| 147 | writer.simpletag("map", code=hex(code), name=name) |
| 148 | if isUnicode: |
| 149 | writer.comment(Unicode[code]) |
| 150 | writer.newline() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 151 | |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 152 | def __lt__(self, other): |
| 153 | if not isinstance(other, CmapSubtable): |
Behdad Esfahbod | 273a900 | 2013-12-07 03:40:44 -0500 | [diff] [blame] | 154 | return NotImplemented |
Behdad Esfahbod | 96b321c | 2013-08-17 11:11:22 -0400 | [diff] [blame] | 155 | |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 156 | # implemented so that list.sort() sorts according to the spec. |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 157 | selfTuple = ( |
Behdad Esfahbod | 94118dc | 2013-10-28 12:16:41 +0100 | [diff] [blame] | 158 | getattr(self, "platformID", None), |
| 159 | getattr(self, "platEncID", None), |
| 160 | getattr(self, "language", None), |
| 161 | self.__dict__) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 162 | otherTuple = ( |
Behdad Esfahbod | 94118dc | 2013-10-28 12:16:41 +0100 | [diff] [blame] | 163 | getattr(other, "platformID", None), |
| 164 | getattr(other, "platEncID", None), |
| 165 | getattr(other, "language", None), |
| 166 | other.__dict__) |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 167 | return selfTuple < otherTuple |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 168 | |
| 169 | |
| 170 | class cmap_format_0(CmapSubtable): |
| 171 | |
| 172 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 173 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 174 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 175 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 176 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 177 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 178 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 179 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 180 | assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 181 | glyphIdArray = array.array("B") |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 182 | glyphIdArray.fromstring(self.data) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 183 | self.cmap = cmap = {} |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 184 | lenArray = len(glyphIdArray) |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 185 | charCodes = list(range(lenArray)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 186 | names = map(self.ttFont.getGlyphName, glyphIdArray) |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 187 | list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 188 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 189 | |
| 190 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 191 | if self.data: |
| 192 | return struct.pack(">HHH", 0, 262, self.language) + self.data |
| 193 | |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 194 | charCodeList = sorted(self.cmap.items()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 195 | charCodes = [entry[0] for entry in charCodeList] |
| 196 | valueList = [entry[1] for entry in charCodeList] |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 197 | assert charCodes == list(range(256)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 198 | valueList = map(ttFont.getGlyphID, valueList) |
| 199 | |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 200 | glyphIdArray = array.array("B", valueList) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 201 | data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 202 | assert len(data) == 262 |
| 203 | return data |
| 204 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 205 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 206 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 207 | if not hasattr(self, "cmap"): |
| 208 | self.cmap = {} |
| 209 | cmap = self.cmap |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 210 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 211 | if not isinstance(element, tuple): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 212 | continue |
| 213 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 214 | if name != "map": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 215 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 216 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 217 | |
| 218 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 219 | subHeaderFormat = ">HHhH" |
Behdad Esfahbod | e388db5 | 2013-11-28 14:26:58 -0500 | [diff] [blame] | 220 | class SubHeader(object): |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 221 | def __init__(self): |
| 222 | self.firstCode = None |
| 223 | self.entryCount = None |
| 224 | self.idDelta = None |
| 225 | self.idRangeOffset = None |
| 226 | self.glyphIndexArray = [] |
| 227 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 228 | class cmap_format_2(CmapSubtable): |
| 229 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 230 | def setIDDelta(self, subHeader): |
| 231 | subHeader.idDelta = 0 |
| 232 | # find the minGI which is not zero. |
| 233 | minGI = subHeader.glyphIndexArray[0] |
| 234 | for gid in subHeader.glyphIndexArray: |
| 235 | if (gid != 0) and (gid < minGI): |
| 236 | minGI = gid |
| 237 | # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. |
| 238 | # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. |
| 239 | # We would like to pick an idDelta such that the first glyphArray GID is 1, |
| 240 | # so that we are more likely to be able to combine glypharray GID subranges. |
| 241 | # This means that we have a problem when minGI is > 32K |
| 242 | # Since the final gi is reconstructed from the glyphArray GID by: |
| 243 | # (short)finalGID = (gid + idDelta) % 0x10000), |
| 244 | # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the |
| 245 | # negative number to an unsigned short. |
| 246 | |
| 247 | if (minGI > 1): |
| 248 | if minGI > 0x7FFF: |
| 249 | subHeader.idDelta = -(0x10000 - minGI) -1 |
| 250 | else: |
| 251 | subHeader.idDelta = minGI -1 |
| 252 | idDelta = subHeader.idDelta |
| 253 | for i in range(subHeader.entryCount): |
| 254 | gid = subHeader.glyphIndexArray[i] |
| 255 | if gid > 0: |
| 256 | subHeader.glyphIndexArray[i] = gid - idDelta |
| 257 | |
| 258 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 259 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 260 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 261 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 262 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 263 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 264 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 265 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 266 | |
| 267 | data = self.data # decompileHeader assigns the data after the header to self.data |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 268 | subHeaderKeys = [] |
| 269 | maxSubHeaderindex = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 270 | # get the key array, and determine the number of subHeaders. |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 271 | allKeys = array.array("H") |
| 272 | allKeys.fromstring(data[:512]) |
| 273 | data = data[512:] |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 274 | if sys.byteorder != "big": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 275 | allKeys.byteswap() |
Behdad Esfahbod | 32c10ee | 2013-11-27 17:46:17 -0500 | [diff] [blame] | 276 | subHeaderKeys = [ key//8 for key in allKeys] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 277 | maxSubHeaderindex = max(subHeaderKeys) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 278 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 279 | #Load subHeaders |
| 280 | subHeaderList = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 281 | pos = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 282 | for i in range(maxSubHeaderindex + 1): |
| 283 | subHeader = SubHeader() |
| 284 | (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 285 | subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) |
| 286 | pos += 8 |
| 287 | giDataPos = pos + subHeader.idRangeOffset-2 |
| 288 | giList = array.array("H") |
| 289 | giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 290 | if sys.byteorder != "big": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 291 | giList.byteswap() |
| 292 | subHeader.glyphIndexArray = giList |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 293 | subHeaderList.append(subHeader) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 294 | # How this gets processed. |
| 295 | # Charcodes may be one or two bytes. |
| 296 | # The first byte of a charcode is mapped through the subHeaderKeys, to select |
| 297 | # a subHeader. For any subheader but 0, the next byte is then mapped through the |
| 298 | # selected subheader. If subheader Index 0 is selected, then the byte itself is |
| 299 | # mapped through the subheader, and there is no second byte. |
| 300 | # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. |
| 301 | # |
| 302 | # Each subheader references a range in the glyphIndexArray whose length is entryCount. |
| 303 | # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray |
| 304 | # referenced by another subheader. |
| 305 | # The only subheader that will be referenced by more than one first-byte value is the subheader |
| 306 | # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: |
| 307 | # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} |
| 308 | # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. |
| 309 | # A subheader specifies a subrange within (0...256) by the |
| 310 | # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero |
| 311 | # (e.g. glyph not in font). |
| 312 | # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). |
| 313 | # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by |
| 314 | # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the |
| 315 | # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. |
| 316 | # Example for Logocut-Medium |
| 317 | # first byte of charcode = 129; selects subheader 1. |
| 318 | # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} |
| 319 | # second byte of charCode = 66 |
| 320 | # the index offset = 66-64 = 2. |
| 321 | # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: |
| 322 | # [glyphIndexArray index], [subrange array index] = glyphIndex |
| 323 | # [256], [0]=1 from charcode [129, 64] |
| 324 | # [257], [1]=2 from charcode [129, 65] |
| 325 | # [258], [2]=3 from charcode [129, 66] |
| 326 | # [259], [3]=4 from charcode [129, 67] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 327 | # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, |
| 328 | # add it to the glyphID to get the final glyphIndex |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 329 | # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 330 | |
Behdad Esfahbod | 5f6418d | 2013-11-27 22:00:49 -0500 | [diff] [blame] | 331 | self.data = b"" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 332 | self.cmap = cmap = {} |
| 333 | notdefGI = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 334 | for firstByte in range(256): |
| 335 | subHeadindex = subHeaderKeys[firstByte] |
| 336 | subHeader = subHeaderList[subHeadindex] |
| 337 | if subHeadindex == 0: |
| 338 | if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 339 | continue # gi is notdef. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 340 | else: |
| 341 | charCode = firstByte |
| 342 | offsetIndex = firstByte - subHeader.firstCode |
| 343 | gi = subHeader.glyphIndexArray[offsetIndex] |
| 344 | if gi != 0: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 345 | gi = (gi + subHeader.idDelta) % 0x10000 |
| 346 | else: |
| 347 | continue # gi is notdef. |
| 348 | cmap[charCode] = gi |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 349 | else: |
| 350 | if subHeader.entryCount: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 351 | charCodeOffset = firstByte * 256 + subHeader.firstCode |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 352 | for offsetIndex in range(subHeader.entryCount): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 353 | charCode = charCodeOffset + offsetIndex |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 354 | gi = subHeader.glyphIndexArray[offsetIndex] |
| 355 | if gi != 0: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 356 | gi = (gi + subHeader.idDelta) % 0x10000 |
| 357 | else: |
| 358 | continue |
| 359 | cmap[charCode] = gi |
| 360 | # If not subHeader.entryCount, then all char codes with this first byte are |
| 361 | # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the |
| 362 | # same as mapping it to .notdef. |
| 363 | # cmap values are GID's. |
| 364 | glyphOrder = self.ttFont.getGlyphOrder() |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 365 | gids = list(cmap.values()) |
| 366 | charCodes = list(cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 367 | lenCmap = len(gids) |
| 368 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 369 | names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 370 | except IndexError: |
| 371 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 372 | names = list(map(getGlyphName, gids )) |
| 373 | list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 374 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 375 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 376 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 377 | if self.data: |
| 378 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 379 | kEmptyTwoCharCodeRange = -1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 380 | notdefGI = 0 |
| 381 | |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 382 | items = sorted(self.cmap.items()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 383 | charCodes = [item[0] for item in items] |
| 384 | names = [item[1] for item in items] |
| 385 | nameMap = ttFont.getReverseGlyphMap() |
| 386 | lenCharCodes = len(charCodes) |
| 387 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 388 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 389 | except KeyError: |
Behdad Esfahbod | dc87372 | 2013-12-04 21:28:50 -0500 | [diff] [blame] | 390 | nameMap = ttFont.getReverseGlyphMap(rebuild=True) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 391 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 392 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 393 | except KeyError: |
| 394 | # allow virtual GIDs in format 2 tables |
| 395 | gids = [] |
| 396 | for name in names: |
| 397 | try: |
| 398 | gid = nameMap[name] |
| 399 | except KeyError: |
| 400 | try: |
| 401 | if (name[:3] == 'gid'): |
| 402 | gid = eval(name[3:]) |
| 403 | else: |
| 404 | gid = ttFont.getGlyphID(name) |
| 405 | except: |
| 406 | raise KeyError(name) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 407 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 408 | gids.append(gid) |
| 409 | |
| 410 | # Process the (char code to gid) item list in char code order. |
| 411 | # By definition, all one byte char codes map to subheader 0. |
| 412 | # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, |
| 413 | # which defines all char codes in its range to map to notdef) unless proven otherwise. |
| 414 | # Note that since the char code items are processed in char code order, all the char codes with the |
| 415 | # same first byte are in sequential order. |
| 416 | |
| 417 | subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 418 | subHeaderList = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 419 | |
| 420 | # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up |
| 421 | # with a cmap where all the one byte char codes map to notdef, |
| 422 | # with the result that the subhead 0 would not get created just by processing the item list. |
| 423 | charCode = charCodes[0] |
| 424 | if charCode > 255: |
| 425 | subHeader = SubHeader() |
| 426 | subHeader.firstCode = 0 |
| 427 | subHeader.entryCount = 0 |
| 428 | subHeader.idDelta = 0 |
| 429 | subHeader.idRangeOffset = 0 |
| 430 | subHeaderList.append(subHeader) |
| 431 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 432 | |
| 433 | lastFirstByte = -1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 434 | items = zip(charCodes, gids) |
| 435 | for charCode, gid in items: |
| 436 | if gid == 0: |
| 437 | continue |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 438 | firstbyte = charCode >> 8 |
| 439 | secondByte = charCode & 0x00FF |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 440 | |
| 441 | if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 442 | if lastFirstByte > -1: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 443 | # fix GI's and iDelta of current subheader. |
| 444 | self.setIDDelta(subHeader) |
| 445 | |
| 446 | # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero |
| 447 | # for the indices matching the char codes. |
| 448 | if lastFirstByte == 0: |
| 449 | for index in range(subHeader.entryCount): |
| 450 | charCode = subHeader.firstCode + index |
| 451 | subHeaderKeys[charCode] = 0 |
| 452 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 453 | assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." |
| 454 | # init new subheader |
| 455 | subHeader = SubHeader() |
| 456 | subHeader.firstCode = secondByte |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 457 | subHeader.entryCount = 1 |
| 458 | subHeader.glyphIndexArray.append(gid) |
| 459 | subHeaderList.append(subHeader) |
| 460 | subHeaderKeys[firstbyte] = len(subHeaderList) -1 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 461 | lastFirstByte = firstbyte |
| 462 | else: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 463 | # need to fill in with notdefs all the code points between the last charCode and the current charCode. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 464 | codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) |
| 465 | for i in range(codeDiff): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 466 | subHeader.glyphIndexArray.append(notdefGI) |
| 467 | subHeader.glyphIndexArray.append(gid) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 468 | subHeader.entryCount = subHeader.entryCount + codeDiff + 1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 469 | |
| 470 | # fix GI's and iDelta of last subheader that we we added to the subheader array. |
| 471 | self.setIDDelta(subHeader) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 472 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 473 | # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 474 | subHeader = SubHeader() |
| 475 | subHeader.firstCode = 0 |
| 476 | subHeader.entryCount = 0 |
| 477 | subHeader.idDelta = 0 |
| 478 | subHeader.idRangeOffset = 2 |
| 479 | subHeaderList.append(subHeader) |
| 480 | emptySubheadIndex = len(subHeaderList) - 1 |
| 481 | for index in range(256): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 482 | if subHeaderKeys[index] == kEmptyTwoCharCodeRange: |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 483 | subHeaderKeys[index] = emptySubheadIndex |
| 484 | # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 485 | # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 486 | # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with |
| 487 | # charcode 0 and GID 0. |
| 488 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 489 | idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 490 | subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. |
| 491 | for index in range(subheadRangeLen): |
| 492 | subHeader = subHeaderList[index] |
| 493 | subHeader.idRangeOffset = 0 |
| 494 | for j in range(index): |
| 495 | prevSubhead = subHeaderList[j] |
| 496 | if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray |
| 497 | subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 |
| 498 | subHeader.glyphIndexArray = [] |
| 499 | break |
| 500 | if subHeader.idRangeOffset == 0: # didn't find one. |
| 501 | subHeader.idRangeOffset = idRangeOffset |
| 502 | idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. |
| 503 | else: |
| 504 | idRangeOffset = idRangeOffset - 8 # one less subheader |
| 505 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 506 | # Now we can write out the data! |
| 507 | length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. |
| 508 | for subhead in subHeaderList[:-1]: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 509 | length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. |
| 510 | dataList = [struct.pack(">HHH", 2, length, self.language)] |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 511 | for index in subHeaderKeys: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 512 | dataList.append(struct.pack(">H", index*8)) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 513 | for subhead in subHeaderList: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 514 | dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 515 | for subhead in subHeaderList[:-1]: |
| 516 | for gi in subhead.glyphIndexArray: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 517 | dataList.append(struct.pack(">H", gi)) |
Behdad Esfahbod | 18316aa | 2013-11-27 21:17:35 -0500 | [diff] [blame] | 518 | data = bytesjoin(dataList) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 519 | assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) |
| 520 | return data |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 521 | |
| 522 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 523 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 524 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 525 | if not hasattr(self, "cmap"): |
| 526 | self.cmap = {} |
| 527 | cmap = self.cmap |
| 528 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 529 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 530 | if not isinstance(element, tuple): |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 531 | continue |
| 532 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 533 | if name != "map": |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 534 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 535 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 536 | |
| 537 | |
| 538 | cmap_format_4_format = ">7H" |
| 539 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 540 | #uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. |
| 541 | #uint16 reservedPad # This value should be zero |
| 542 | #uint16 startCode[segCount] # Starting character code for each segment |
| 543 | #uint16 idDelta[segCount] # Delta for all character codes in segment |
| 544 | #uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 |
| 545 | #uint16 glyphIndexArray[variable] # Glyph index array |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 546 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 547 | def splitRange(startCode, endCode, cmap): |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 548 | # Try to split a range of character codes into subranges with consecutive |
| 549 | # glyph IDs in such a way that the cmap4 subtable can be stored "most" |
| 550 | # efficiently. I can't prove I've got the optimal solution, but it seems |
| 551 | # to do well with the fonts I tested: none became bigger, many became smaller. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 552 | if startCode == endCode: |
| 553 | return [], [endCode] |
| 554 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 555 | lastID = cmap[startCode] |
| 556 | lastCode = startCode |
| 557 | inOrder = None |
| 558 | orderedBegin = None |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 559 | subRanges = [] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 560 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 561 | # Gather subranges in which the glyph IDs are consecutive. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 562 | for code in range(startCode + 1, endCode + 1): |
| 563 | glyphID = cmap[code] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 564 | |
| 565 | if glyphID - 1 == lastID: |
| 566 | if inOrder is None or not inOrder: |
| 567 | inOrder = 1 |
| 568 | orderedBegin = lastCode |
| 569 | else: |
| 570 | if inOrder: |
| 571 | inOrder = 0 |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 572 | subRanges.append((orderedBegin, lastCode)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 573 | orderedBegin = None |
| 574 | |
| 575 | lastID = glyphID |
| 576 | lastCode = code |
| 577 | |
| 578 | if inOrder: |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 579 | subRanges.append((orderedBegin, lastCode)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 580 | assert lastCode == endCode |
| 581 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 582 | # Now filter out those new subranges that would only make the data bigger. |
| 583 | # A new segment cost 8 bytes, not using a new segment costs 2 bytes per |
| 584 | # character. |
| 585 | newRanges = [] |
| 586 | for b, e in subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 587 | if b == startCode and e == endCode: |
| 588 | break # the whole range, we're fine |
| 589 | if b == startCode or e == endCode: |
| 590 | threshold = 4 # split costs one more segment |
| 591 | else: |
| 592 | threshold = 8 # split costs two more segments |
| 593 | if (e - b + 1) > threshold: |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 594 | newRanges.append((b, e)) |
| 595 | subRanges = newRanges |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 596 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 597 | if not subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 598 | return [], [endCode] |
| 599 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 600 | if subRanges[0][0] != startCode: |
| 601 | subRanges.insert(0, (startCode, subRanges[0][0] - 1)) |
| 602 | if subRanges[-1][1] != endCode: |
| 603 | subRanges.append((subRanges[-1][1] + 1, endCode)) |
| 604 | |
| 605 | # Fill the "holes" in the segments list -- those are the segments in which |
| 606 | # the glyph IDs are _not_ consecutive. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 607 | i = 1 |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 608 | while i < len(subRanges): |
| 609 | if subRanges[i-1][1] + 1 != subRanges[i][0]: |
| 610 | subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 611 | i = i + 1 |
| 612 | i = i + 1 |
| 613 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 614 | # Transform the ranges into startCode/endCode lists. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 615 | start = [] |
| 616 | end = [] |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 617 | for b, e in subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 618 | start.append(b) |
| 619 | end.append(e) |
| 620 | start.pop(0) |
| 621 | |
| 622 | assert len(start) + 1 == len(end) |
| 623 | return start, end |
| 624 | |
| 625 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 626 | class cmap_format_4(CmapSubtable): |
| 627 | |
| 628 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 629 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 630 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 631 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 632 | self.decompileHeader(self.data[offset:offset+int(length)], ttFont) |
| 633 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 634 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 635 | |
| 636 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 637 | (segCountX2, searchRange, entrySelector, rangeShift) = \ |
| 638 | struct.unpack(">4H", data[:8]) |
| 639 | data = data[8:] |
Behdad Esfahbod | 32c10ee | 2013-11-27 17:46:17 -0500 | [diff] [blame] | 640 | segCount = segCountX2 // 2 |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 641 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 642 | allCodes = array.array("H") |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 643 | allCodes.fromstring(data) |
| 644 | self.data = data = None |
| 645 | |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 646 | if sys.byteorder != "big": |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 647 | allCodes.byteswap() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 648 | |
| 649 | # divide the data |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 650 | endCode = allCodes[:segCount] |
| 651 | allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field |
| 652 | startCode = allCodes[:segCount] |
| 653 | allCodes = allCodes[segCount:] |
| 654 | idDelta = allCodes[:segCount] |
| 655 | allCodes = allCodes[segCount:] |
| 656 | idRangeOffset = allCodes[:segCount] |
| 657 | glyphIndexArray = allCodes[segCount:] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 658 | lenGIArray = len(glyphIndexArray) |
| 659 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 660 | # build 2-byte character mapping |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 661 | charCodes = [] |
| 662 | gids = [] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 663 | for i in range(len(startCode) - 1): # don't do 0xffff! |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 664 | rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 665 | charCodes = charCodes + rangeCharCodes |
| 666 | for charCode in rangeCharCodes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 667 | rangeOffset = idRangeOffset[i] |
| 668 | if rangeOffset == 0: |
| 669 | glyphID = charCode + idDelta[i] |
| 670 | else: |
| 671 | # *someone* needs to get killed. |
Behdad Esfahbod | 32c10ee | 2013-11-27 17:46:17 -0500 | [diff] [blame] | 672 | index = idRangeOffset[i] // 2 + (charCode - startCode[i]) + i - len(idRangeOffset) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 673 | assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 674 | if glyphIndexArray[index] != 0: # if not missing glyph |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 675 | glyphID = glyphIndexArray[index] + idDelta[i] |
| 676 | else: |
| 677 | glyphID = 0 # missing glyph |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 678 | gids.append(glyphID % 0x10000) |
| 679 | |
| 680 | self.cmap = cmap = {} |
| 681 | lenCmap = len(gids) |
| 682 | glyphOrder = self.ttFont.getGlyphOrder() |
| 683 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 684 | names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 685 | except IndexError: |
| 686 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 687 | names = list(map(getGlyphName, gids )) |
| 688 | list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 689 | |
| 690 | |
| 691 | |
| 692 | def setIDDelta(self, idDelta): |
| 693 | # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. |
| 694 | # idDelta is a short, and must be between -32K and 32K |
| 695 | # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 |
| 696 | # This means that we have a problem because we can need to assign to idDelta values |
| 697 | # between -(64K-2) and 64K -1. |
| 698 | # Since the final gi is reconstructed from the glyphArray GID by: |
| 699 | # (short)finalGID = (gid + idDelta) % 0x10000), |
| 700 | # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the |
| 701 | # negative number to an unsigned short. |
| 702 | # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of |
| 703 | # the modulo arithmetic. |
| 704 | |
| 705 | if idDelta > 0x7FFF: |
| 706 | idDelta = idDelta - 0x10000 |
| 707 | elif idDelta < -0x7FFF: |
| 708 | idDelta = idDelta + 0x10000 |
| 709 | |
| 710 | return idDelta |
| 711 | |
| 712 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 713 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 714 | if self.data: |
| 715 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
| 716 | |
jvr | ea9dfa9 | 2002-05-12 17:14:50 +0000 | [diff] [blame] | 717 | from fontTools.ttLib.sfnt import maxPowerOfTwo |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 718 | |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 719 | charCodes = list(self.cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 720 | lenCharCodes = len(charCodes) |
| 721 | if lenCharCodes == 0: |
| 722 | startCode = [0xffff] |
| 723 | endCode = [0xffff] |
| 724 | else: |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 725 | charCodes.sort() |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 726 | names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 727 | nameMap = ttFont.getReverseGlyphMap() |
| 728 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 729 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 730 | except KeyError: |
Behdad Esfahbod | dc87372 | 2013-12-04 21:28:50 -0500 | [diff] [blame] | 731 | nameMap = ttFont.getReverseGlyphMap(rebuild=True) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 732 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 733 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 734 | except KeyError: |
| 735 | # allow virtual GIDs in format 4 tables |
| 736 | gids = [] |
| 737 | for name in names: |
| 738 | try: |
| 739 | gid = nameMap[name] |
| 740 | except KeyError: |
| 741 | try: |
| 742 | if (name[:3] == 'gid'): |
| 743 | gid = eval(name[3:]) |
| 744 | else: |
| 745 | gid = ttFont.getGlyphID(name) |
| 746 | except: |
| 747 | raise KeyError(name) |
| 748 | |
| 749 | gids.append(gid) |
| 750 | cmap = {} # code:glyphID mapping |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 751 | list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 752 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 753 | # Build startCode and endCode lists. |
| 754 | # Split the char codes in ranges of consecutive char codes, then split |
| 755 | # each range in more ranges of consecutive/not consecutive glyph IDs. |
| 756 | # See splitRange(). |
| 757 | lastCode = charCodes[0] |
| 758 | endCode = [] |
| 759 | startCode = [lastCode] |
| 760 | for charCode in charCodes[1:]: # skip the first code, it's the first start code |
| 761 | if charCode == lastCode + 1: |
| 762 | lastCode = charCode |
| 763 | continue |
| 764 | start, end = splitRange(startCode[-1], lastCode, cmap) |
| 765 | startCode.extend(start) |
| 766 | endCode.extend(end) |
| 767 | startCode.append(charCode) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 768 | lastCode = charCode |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 769 | endCode.append(lastCode) |
| 770 | startCode.append(0xffff) |
| 771 | endCode.append(0xffff) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 772 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 773 | # build up rest of cruft |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 774 | idDelta = [] |
| 775 | idRangeOffset = [] |
| 776 | glyphIndexArray = [] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 777 | for i in range(len(endCode)-1): # skip the closing codes (0xffff) |
| 778 | indices = [] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 779 | for charCode in range(startCode[i], endCode[i] + 1): |
| 780 | indices.append(cmap[charCode]) |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 781 | if (indices == list(range(indices[0], indices[0] + len(indices)))): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 782 | idDeltaTemp = self.setIDDelta(indices[0] - startCode[i]) |
| 783 | idDelta.append( idDeltaTemp) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 784 | idRangeOffset.append(0) |
| 785 | else: |
| 786 | # someone *definitely* needs to get killed. |
| 787 | idDelta.append(0) |
| 788 | idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 789 | glyphIndexArray.extend(indices) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 790 | idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef |
| 791 | idRangeOffset.append(0) |
| 792 | |
| 793 | # Insane. |
| 794 | segCount = len(endCode) |
| 795 | segCountX2 = segCount * 2 |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 796 | maxExponent = maxPowerOfTwo(segCount) |
| 797 | searchRange = 2 * (2 ** maxExponent) |
| 798 | entrySelector = maxExponent |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 799 | rangeShift = 2 * segCount - searchRange |
| 800 | |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 801 | charCodeArray = array.array("H", endCode + [0] + startCode) |
| 802 | idDeltaeArray = array.array("h", idDelta) |
| 803 | restArray = array.array("H", idRangeOffset + glyphIndexArray) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 804 | if sys.byteorder != "big": |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 805 | charCodeArray.byteswap() |
| 806 | idDeltaeArray.byteswap() |
| 807 | restArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 808 | data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring() |
| 809 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 810 | length = struct.calcsize(cmap_format_4_format) + len(data) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 811 | header = struct.pack(cmap_format_4_format, self.format, length, self.language, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 812 | segCountX2, searchRange, entrySelector, rangeShift) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 813 | return header + data |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 814 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 815 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 816 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 817 | if not hasattr(self, "cmap"): |
| 818 | self.cmap = {} |
| 819 | cmap = self.cmap |
| 820 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 821 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 822 | if not isinstance(element, tuple): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 823 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 824 | nameMap, attrsMap, dummyContent = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 825 | if nameMap != "map": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 826 | assert 0, "Unrecognized keyword in cmap subtable" |
| 827 | cmap[safeEval(attrsMap["code"])] = attrsMap["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 828 | |
| 829 | |
| 830 | class cmap_format_6(CmapSubtable): |
| 831 | |
| 832 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 833 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 834 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 835 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 836 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 837 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 838 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 839 | |
| 840 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 841 | firstCode, entryCount = struct.unpack(">HH", data[:4]) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 842 | firstCode = int(firstCode) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 843 | data = data[4:] |
Just | f6b1563 | 2000-08-23 12:33:14 +0000 | [diff] [blame] | 844 | #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 845 | glyphIndexArray = array.array("H") |
Just | 43fa4be | 2000-10-11 18:04:03 +0000 | [diff] [blame] | 846 | glyphIndexArray.fromstring(data[:2 * int(entryCount)]) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 847 | if sys.byteorder != "big": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 848 | glyphIndexArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 849 | self.data = data = None |
| 850 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 851 | self.cmap = cmap = {} |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 852 | |
| 853 | lenArray = len(glyphIndexArray) |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 854 | charCodes = list(range(firstCode, firstCode + lenArray)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 855 | glyphOrder = self.ttFont.getGlyphOrder() |
| 856 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 857 | names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 858 | except IndexError: |
| 859 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 860 | names = list(map(getGlyphName, glyphIndexArray )) |
| 861 | list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 862 | |
| 863 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 864 | if self.data: |
| 865 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
| 866 | cmap = self.cmap |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 867 | codes = list(cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 868 | if codes: # yes, there are empty cmap tables. |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 869 | codes = list(range(codes[0], codes[-1] + 1)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 870 | firstCode = codes[0] |
Behdad Esfahbod | 13a08d0 | 2013-11-26 15:49:36 -0500 | [diff] [blame] | 871 | valueList = [cmap.get(code, ".notdef") for code in codes] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 872 | valueList = map(ttFont.getGlyphID, valueList) |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 873 | glyphIndexArray = array.array("H", valueList) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 874 | if sys.byteorder != "big": |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 875 | glyphIndexArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 876 | data = glyphIndexArray.tostring() |
| 877 | else: |
Behdad Esfahbod | 5f6418d | 2013-11-27 22:00:49 -0500 | [diff] [blame] | 878 | data = b"" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 879 | firstCode = 0 |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 880 | header = struct.pack(">HHHHH", |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 881 | 6, len(data) + 10, self.language, firstCode, len(codes)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 882 | return header + data |
| 883 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 884 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 885 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 886 | if not hasattr(self, "cmap"): |
| 887 | self.cmap = {} |
| 888 | cmap = self.cmap |
| 889 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 890 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 891 | if not isinstance(element, tuple): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 892 | continue |
| 893 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 894 | if name != "map": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 895 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 896 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 897 | |
| 898 | |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 899 | class cmap_format_12_or_13(CmapSubtable): |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 900 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 901 | def __init__(self, format): |
| 902 | self.format = format |
| 903 | self.reserved = 0 |
| 904 | self.data = None |
| 905 | self.ttFont = None |
| 906 | |
| 907 | def decompileHeader(self, data, ttFont): |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 908 | format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 909 | assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 910 | self.format = format |
| 911 | self.reserved = reserved |
| 912 | self.length = length |
| 913 | self.language = language |
| 914 | self.nGroups = nGroups |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 915 | self.data = data[16:] |
| 916 | self.ttFont = ttFont |
| 917 | |
| 918 | def decompile(self, data, ttFont): |
| 919 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 920 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 921 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 922 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 923 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 924 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 925 | |
| 926 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 927 | charCodes = [] |
| 928 | gids = [] |
| 929 | pos = 0 |
| 930 | for i in range(self.nGroups): |
| 931 | startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) |
| 932 | pos += 12 |
| 933 | lenGroup = 1 + endCharCode - startCharCode |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 934 | charCodes += list(range(startCharCode, endCharCode +1)) |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 935 | gids += self._computeGIDs(glyphID, lenGroup) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 936 | self.data = data = None |
| 937 | self.cmap = cmap = {} |
| 938 | lenCmap = len(gids) |
| 939 | glyphOrder = self.ttFont.getGlyphOrder() |
| 940 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 941 | names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 942 | except IndexError: |
| 943 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 944 | names = list(map(getGlyphName, gids )) |
| 945 | list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 946 | |
| 947 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 948 | if self.data: |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 949 | return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 950 | charCodes = list(self.cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 951 | lenCharCodes = len(charCodes) |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 952 | names = list(self.cmap.values()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 953 | nameMap = ttFont.getReverseGlyphMap() |
| 954 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 955 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 956 | except KeyError: |
Behdad Esfahbod | dc87372 | 2013-12-04 21:28:50 -0500 | [diff] [blame] | 957 | nameMap = ttFont.getReverseGlyphMap(rebuild=True) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 958 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 959 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 960 | except KeyError: |
| 961 | # allow virtual GIDs in format 12 tables |
| 962 | gids = [] |
| 963 | for name in names: |
| 964 | try: |
| 965 | gid = nameMap[name] |
| 966 | except KeyError: |
| 967 | try: |
| 968 | if (name[:3] == 'gid'): |
| 969 | gid = eval(name[3:]) |
| 970 | else: |
| 971 | gid = ttFont.getGlyphID(name) |
| 972 | except: |
| 973 | raise KeyError(name) |
| 974 | |
| 975 | gids.append(gid) |
| 976 | |
| 977 | cmap = {} # code:glyphID mapping |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 978 | list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 979 | |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 980 | charCodes.sort() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 981 | index = 0 |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 982 | startCharCode = charCodes[0] |
| 983 | startGlyphID = cmap[startCharCode] |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 984 | lastGlyphID = startGlyphID - self._format_step |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 985 | lastCharCode = startCharCode - 1 |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 986 | nGroups = 0 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 987 | dataList = [] |
| 988 | maxIndex = len(charCodes) |
| 989 | for index in range(maxIndex): |
| 990 | charCode = charCodes[index] |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 991 | glyphID = cmap[charCode] |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 992 | if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 993 | dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 994 | startCharCode = charCode |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 995 | startGlyphID = glyphID |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 996 | nGroups = nGroups + 1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 997 | lastGlyphID = glyphID |
| 998 | lastCharCode = charCode |
| 999 | dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 1000 | nGroups = nGroups + 1 |
Behdad Esfahbod | 18316aa | 2013-11-27 21:17:35 -0500 | [diff] [blame] | 1001 | data = bytesjoin(dataList) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1002 | lengthSubtable = len(data) +16 |
| 1003 | assert len(data) == (nGroups*12) == (lengthSubtable-16) |
| 1004 | return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1005 | |
| 1006 | def toXML(self, writer, ttFont): |
| 1007 | writer.begintag(self.__class__.__name__, [ |
| 1008 | ("platformID", self.platformID), |
| 1009 | ("platEncID", self.platEncID), |
| 1010 | ("format", self.format), |
| 1011 | ("reserved", self.reserved), |
| 1012 | ("length", self.length), |
| 1013 | ("language", self.language), |
| 1014 | ("nGroups", self.nGroups), |
| 1015 | ]) |
| 1016 | writer.newline() |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 1017 | codes = sorted(self.cmap.items()) |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1018 | self._writeCodes(codes, writer) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1019 | writer.endtag(self.__class__.__name__) |
| 1020 | writer.newline() |
| 1021 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 1022 | def fromXML(self, name, attrs, content, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1023 | self.format = safeEval(attrs["format"]) |
| 1024 | self.reserved = safeEval(attrs["reserved"]) |
| 1025 | self.length = safeEval(attrs["length"]) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1026 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1027 | self.nGroups = safeEval(attrs["nGroups"]) |
| 1028 | if not hasattr(self, "cmap"): |
| 1029 | self.cmap = {} |
| 1030 | cmap = self.cmap |
| 1031 | |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1032 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 1033 | if not isinstance(element, tuple): |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1034 | continue |
| 1035 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 1036 | if name != "map": |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1037 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1038 | cmap[safeEval(attrs["code"])] = attrs["name"] |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1039 | |
| 1040 | |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 1041 | class cmap_format_12(cmap_format_12_or_13): |
| 1042 | def __init__(self, format): |
| 1043 | cmap_format_12_or_13.__init__(self, format) |
| 1044 | self._format_step = 1 |
| 1045 | |
| 1046 | def _computeGIDs(self, startingGlyph, numberOfGlyphs): |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 1047 | return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 1048 | |
| 1049 | def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): |
| 1050 | return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) |
| 1051 | |
| 1052 | |
| 1053 | class cmap_format_13(cmap_format_12_or_13): |
| 1054 | def __init__(self, format): |
| 1055 | cmap_format_12_or_13.__init__(self, format) |
| 1056 | self._format_step = 0 |
| 1057 | |
| 1058 | def _computeGIDs(self, startingGlyph, numberOfGlyphs): |
| 1059 | return [startingGlyph] * numberOfGlyphs |
| 1060 | |
| 1061 | def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): |
| 1062 | return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) |
| 1063 | |
| 1064 | |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1065 | def cvtToUVS(threeByteString): |
Behdad Esfahbod | 2242b26 | 2013-11-28 06:35:12 -0500 | [diff] [blame] | 1066 | data = b"\0" + threeByteString |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1067 | val, = struct.unpack(">L", data) |
| 1068 | return val |
| 1069 | |
| 1070 | def cvtFromUVS(val): |
Behdad Esfahbod | 2242b26 | 2013-11-28 06:35:12 -0500 | [diff] [blame] | 1071 | assert 0 <= val < 0x1000000 |
| 1072 | fourByteString = struct.pack(">L", val) |
| 1073 | return fourByteString[1:] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1074 | |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 1075 | |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1076 | class cmap_format_14(CmapSubtable): |
| 1077 | |
| 1078 | def decompileHeader(self, data, ttFont): |
| 1079 | format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) |
| 1080 | self.data = data[10:] |
| 1081 | self.length = length |
| 1082 | self.numVarSelectorRecords = numVarSelectorRecords |
| 1083 | self.ttFont = ttFont |
| 1084 | self.language = 0xFF # has no language. |
| 1085 | |
| 1086 | def decompile(self, data, ttFont): |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1087 | if data is not None and ttFont is not None and ttFont.lazy: |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1088 | self.decompileHeader(data, ttFont) |
| 1089 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1090 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1091 | data = self.data |
| 1092 | |
| 1093 | self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. |
| 1094 | uvsDict = {} |
| 1095 | recOffset = 0 |
| 1096 | for n in range(self.numVarSelectorRecords): |
| 1097 | uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) |
| 1098 | recOffset += 11 |
| 1099 | varUVS = cvtToUVS(uvs) |
| 1100 | if defOVSOffset: |
| 1101 | startOffset = defOVSOffset - 10 |
| 1102 | numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) |
| 1103 | startOffset +=4 |
| 1104 | for r in range(numValues): |
| 1105 | uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) |
| 1106 | startOffset += 4 |
| 1107 | firstBaseUV = cvtToUVS(uv) |
| 1108 | cnt = addtlCnt+1 |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 1109 | baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1110 | glyphList = [None]*cnt |
| 1111 | localUVList = zip(baseUVList, glyphList) |
| 1112 | try: |
| 1113 | uvsDict[varUVS].extend(localUVList) |
| 1114 | except KeyError: |
Behdad Esfahbod | fa5f2e8 | 2013-11-27 04:13:15 -0500 | [diff] [blame] | 1115 | uvsDict[varUVS] = list(localUVList) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1116 | |
| 1117 | if nonDefUVSOffset: |
| 1118 | startOffset = nonDefUVSOffset - 10 |
| 1119 | numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) |
| 1120 | startOffset +=4 |
| 1121 | localUVList = [] |
| 1122 | for r in range(numRecs): |
| 1123 | uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) |
| 1124 | startOffset += 5 |
| 1125 | uv = cvtToUVS(uv) |
| 1126 | glyphName = self.ttFont.getGlyphName(gid) |
| 1127 | localUVList.append( [uv, glyphName] ) |
| 1128 | try: |
| 1129 | uvsDict[varUVS].extend(localUVList) |
| 1130 | except KeyError: |
| 1131 | uvsDict[varUVS] = localUVList |
| 1132 | |
| 1133 | self.uvsDict = uvsDict |
| 1134 | |
| 1135 | def toXML(self, writer, ttFont): |
| 1136 | writer.begintag(self.__class__.__name__, [ |
| 1137 | ("platformID", self.platformID), |
| 1138 | ("platEncID", self.platEncID), |
| 1139 | ("format", self.format), |
| 1140 | ("length", self.length), |
| 1141 | ("numVarSelectorRecords", self.numVarSelectorRecords), |
| 1142 | ]) |
| 1143 | writer.newline() |
| 1144 | uvsDict = self.uvsDict |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 1145 | uvsList = sorted(uvsDict.keys()) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1146 | for uvs in uvsList: |
| 1147 | uvList = uvsDict[uvs] |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1148 | uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1149 | for uv, gname in uvList: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1150 | if gname is None: |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1151 | gname = "None" |
| 1152 | # I use the arg rather than th keyword syntax in order to preserve the attribute order. |
| 1153 | writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] ) |
| 1154 | writer.newline() |
| 1155 | writer.endtag(self.__class__.__name__) |
| 1156 | writer.newline() |
| 1157 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 1158 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1159 | self.format = safeEval(attrs["format"]) |
| 1160 | self.length = safeEval(attrs["length"]) |
| 1161 | self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"]) |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 1162 | self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1163 | if not hasattr(self, "cmap"): |
| 1164 | self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. |
| 1165 | if not hasattr(self, "uvsDict"): |
| 1166 | self.uvsDict = {} |
| 1167 | uvsDict = self.uvsDict |
| 1168 | |
| 1169 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 1170 | if not isinstance(element, tuple): |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1171 | continue |
| 1172 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 1173 | if name != "map": |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1174 | continue |
| 1175 | uvs = safeEval(attrs["uvs"]) |
| 1176 | uv = safeEval(attrs["uv"]) |
| 1177 | gname = attrs["name"] |
| 1178 | if gname == "None": |
| 1179 | gname = None |
| 1180 | try: |
| 1181 | uvsDict[uvs].append( [uv, gname]) |
| 1182 | except KeyError: |
| 1183 | uvsDict[uvs] = [ [uv, gname] ] |
| 1184 | |
| 1185 | |
| 1186 | def compile(self, ttFont): |
| 1187 | if self.data: |
| 1188 | return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data |
| 1189 | |
| 1190 | uvsDict = self.uvsDict |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 1191 | uvsList = sorted(uvsDict.keys()) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1192 | self.numVarSelectorRecords = len(uvsList) |
| 1193 | offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. |
| 1194 | data = [] |
| 1195 | varSelectorRecords =[] |
| 1196 | for uvs in uvsList: |
| 1197 | entryList = uvsDict[uvs] |
| 1198 | |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1199 | defList = [entry for entry in entryList if entry[1] is None] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1200 | if defList: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 1201 | defList = [entry[0] for entry in defList] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1202 | defOVSOffset = offset |
| 1203 | defList.sort() |
| 1204 | |
| 1205 | lastUV = defList[0] |
| 1206 | cnt = -1 |
| 1207 | defRecs = [] |
| 1208 | for defEntry in defList: |
| 1209 | cnt +=1 |
| 1210 | if (lastUV+cnt) != defEntry: |
| 1211 | rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) |
| 1212 | lastUV = defEntry |
| 1213 | defRecs.append(rec) |
| 1214 | cnt = 0 |
| 1215 | |
| 1216 | rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) |
| 1217 | defRecs.append(rec) |
| 1218 | |
| 1219 | numDefRecs = len(defRecs) |
| 1220 | data.append(struct.pack(">L", numDefRecs)) |
| 1221 | data.extend(defRecs) |
| 1222 | offset += 4 + numDefRecs*4 |
| 1223 | else: |
| 1224 | defOVSOffset = 0 |
| 1225 | |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1226 | ndefList = [entry for entry in entryList if entry[1] is not None] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1227 | if ndefList: |
| 1228 | nonDefUVSOffset = offset |
| 1229 | ndefList.sort() |
| 1230 | numNonDefRecs = len(ndefList) |
| 1231 | data.append(struct.pack(">L", numNonDefRecs)) |
| 1232 | offset += 4 + numNonDefRecs*5 |
| 1233 | |
| 1234 | for uv, gname in ndefList: |
| 1235 | gid = ttFont.getGlyphID(gname) |
| 1236 | ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) |
| 1237 | data.append(ndrec) |
| 1238 | else: |
| 1239 | nonDefUVSOffset = 0 |
| 1240 | |
| 1241 | vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) |
| 1242 | varSelectorRecords.append(vrec) |
| 1243 | |
Behdad Esfahbod | 18316aa | 2013-11-27 21:17:35 -0500 | [diff] [blame] | 1244 | data = bytesjoin(varSelectorRecords) + bytesjoin(data) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1245 | self.length = 10 + len(data) |
| 1246 | headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) |
| 1247 | self.data = headerdata + data |
| 1248 | |
| 1249 | return self.data |
| 1250 | |
| 1251 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1252 | class cmap_format_unknown(CmapSubtable): |
| 1253 | |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1254 | def toXML(self, writer, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1255 | cmapName = self.__class__.__name__[:12] + str(self.format) |
| 1256 | writer.begintag(cmapName, [ |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1257 | ("platformID", self.platformID), |
| 1258 | ("platEncID", self.platEncID), |
| 1259 | ]) |
| 1260 | writer.newline() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1261 | writer.dumphex(self.data) |
| 1262 | writer.endtag(cmapName) |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1263 | writer.newline() |
| 1264 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 1265 | def fromXML(self, name, attrs, content, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1266 | self.data = readHex(content) |
| 1267 | self.cmap = {} |
| 1268 | |
| 1269 | def decompileHeader(self, data, ttFont): |
jvr | 427f980 | 2004-09-26 18:32:50 +0000 | [diff] [blame] | 1270 | self.language = 0 # dummy value |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1271 | self.data = data |
| 1272 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1273 | def decompile(self, data, ttFont): |
| 1274 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 1275 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1276 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1277 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 1278 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1279 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1280 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1281 | def compile(self, ttFont): |
| 1282 | if self.data: |
| 1283 | return self.data |
| 1284 | else: |
| 1285 | return None |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1286 | |
| 1287 | cmap_classes = { |
| 1288 | 0: cmap_format_0, |
| 1289 | 2: cmap_format_2, |
| 1290 | 4: cmap_format_4, |
| 1291 | 6: cmap_format_6, |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1292 | 12: cmap_format_12, |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 1293 | 13: cmap_format_13, |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1294 | 14: cmap_format_14, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1295 | } |