Behdad Esfahbod | 1ae2959 | 2014-01-14 15:07:50 +0800 | [diff] [blame] | 1 | from __future__ import print_function, division, absolute_import |
Behdad Esfahbod | 30e691e | 2013-11-27 17:27:45 -0500 | [diff] [blame] | 2 | from fontTools.misc.py23 import * |
| 3 | from fontTools.misc.textTools import safeEval, readHex |
Behdad Esfahbod | 0f74e80 | 2014-03-12 12:17:33 -0700 | [diff] [blame] | 4 | from fontTools.unicode import Unicode |
Behdad Esfahbod | 2b06aaa | 2013-11-27 02:34:11 -0500 | [diff] [blame] | 5 | from . import DefaultTable |
Behdad Esfahbod | 30e691e | 2013-11-27 17:27:45 -0500 | [diff] [blame] | 6 | import sys |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 7 | import struct |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 8 | import array |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 9 | import operator |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 10 | |
| 11 | |
| 12 | class table__c_m_a_p(DefaultTable.DefaultTable): |
| 13 | |
| 14 | def getcmap(self, platformID, platEncID): |
| 15 | for subtable in self.tables: |
| 16 | if (subtable.platformID == platformID and |
| 17 | subtable.platEncID == platEncID): |
| 18 | return subtable |
| 19 | return None # not found |
| 20 | |
| 21 | def decompile(self, data, ttFont): |
| 22 | tableVersion, numSubTables = struct.unpack(">HH", data[:4]) |
| 23 | self.tableVersion = int(tableVersion) |
| 24 | self.tables = tables = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 25 | seenOffsets = {} |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 26 | for i in range(numSubTables): |
| 27 | platformID, platEncID, offset = struct.unpack( |
| 28 | ">HHl", data[4+i*8:4+(i+1)*8]) |
| 29 | platformID, platEncID = int(platformID), int(platEncID) |
| 30 | format, length = struct.unpack(">HH", data[offset:offset+4]) |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 31 | if format in [8,10,12,13]: |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 32 | format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 33 | elif format in [14]: |
| 34 | format, length = struct.unpack(">HL", data[offset:offset+6]) |
| 35 | |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 36 | if not length: |
Behdad Esfahbod | 3ec6a25 | 2013-11-27 04:57:33 -0500 | [diff] [blame] | 37 | print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset)) |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 38 | continue |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 39 | if format not in cmap_classes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 40 | table = cmap_format_unknown(format) |
| 41 | else: |
| 42 | table = cmap_classes[format](format) |
| 43 | table.platformID = platformID |
| 44 | table.platEncID = platEncID |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 45 | # Note that by default we decompile only the subtable header info; |
| 46 | # any other data gets decompiled only when an attribute of the |
| 47 | # subtable is referenced. |
| 48 | table.decompileHeader(data[offset:offset+int(length)], ttFont) |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 49 | if offset in seenOffsets: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 50 | table.cmap = tables[seenOffsets[offset]].cmap |
| 51 | else: |
| 52 | seenOffsets[offset] = i |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 53 | tables.append(table) |
| 54 | |
| 55 | def compile(self, ttFont): |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 56 | self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 57 | numSubTables = len(self.tables) |
| 58 | totalOffset = 4 + 8 * numSubTables |
| 59 | data = struct.pack(">HH", self.tableVersion, numSubTables) |
Behdad Esfahbod | 821572c | 2013-11-27 21:09:03 -0500 | [diff] [blame] | 60 | tableData = b"" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 61 | seen = {} # Some tables are the same object reference. Don't compile them twice. |
| 62 | done = {} # Some tables are different objects, but compile to the same data chunk |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 63 | for table in self.tables: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 64 | try: |
| 65 | offset = seen[id(table.cmap)] |
| 66 | except KeyError: |
| 67 | chunk = table.compile(ttFont) |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 68 | if chunk in done: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 69 | offset = done[chunk] |
| 70 | else: |
| 71 | offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) |
| 72 | tableData = tableData + chunk |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 73 | data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) |
| 74 | return data + tableData |
| 75 | |
| 76 | def toXML(self, writer, ttFont): |
| 77 | writer.simpletag("tableVersion", version=self.tableVersion) |
| 78 | writer.newline() |
| 79 | for table in self.tables: |
| 80 | table.toXML(writer, ttFont) |
| 81 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 82 | def fromXML(self, name, attrs, content, ttFont): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 83 | if name == "tableVersion": |
| 84 | self.tableVersion = safeEval(attrs["version"]) |
| 85 | return |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 86 | if name[:12] != "cmap_format_": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 87 | return |
| 88 | if not hasattr(self, "tables"): |
| 89 | self.tables = [] |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 90 | format = safeEval(name[12:]) |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 91 | if format not in cmap_classes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 92 | table = cmap_format_unknown(format) |
| 93 | else: |
| 94 | table = cmap_classes[format](format) |
| 95 | table.platformID = safeEval(attrs["platformID"]) |
| 96 | table.platEncID = safeEval(attrs["platEncID"]) |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 97 | table.fromXML(name, attrs, content, ttFont) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 98 | self.tables.append(table) |
| 99 | |
| 100 | |
Behdad Esfahbod | e388db5 | 2013-11-28 14:26:58 -0500 | [diff] [blame] | 101 | class CmapSubtable(object): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 102 | |
| 103 | def __init__(self, format): |
| 104 | self.format = format |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 105 | self.data = None |
| 106 | self.ttFont = None |
| 107 | |
| 108 | def __getattr__(self, attr): |
| 109 | # allow lazy decompilation of subtables. |
| 110 | if attr[:2] == '__': # don't handle requests for member functions like '__lt__' |
Behdad Esfahbod | cd5aad9 | 2013-11-27 02:42:28 -0500 | [diff] [blame] | 111 | raise AttributeError(attr) |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 112 | if self.data is None: |
Behdad Esfahbod | cd5aad9 | 2013-11-27 02:42:28 -0500 | [diff] [blame] | 113 | raise AttributeError(attr) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 114 | self.decompile(None, None) # use saved data. |
| 115 | self.data = None # Once this table has been decompiled, make sure we don't |
| 116 | # just return the original data. Also avoids recursion when |
| 117 | # called with an attribute that the cmap subtable doesn't have. |
| 118 | return getattr(self, attr) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 119 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 120 | def decompileHeader(self, data, ttFont): |
| 121 | format, length, language = struct.unpack(">HHH", data[:6]) |
| 122 | assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) |
| 123 | self.format = int(format) |
| 124 | self.length = int(length) |
| 125 | self.language = int(language) |
| 126 | self.data = data[6:] |
| 127 | self.ttFont = ttFont |
| 128 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 129 | def toXML(self, writer, ttFont): |
| 130 | writer.begintag(self.__class__.__name__, [ |
| 131 | ("platformID", self.platformID), |
| 132 | ("platEncID", self.platEncID), |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 133 | ("language", self.language), |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 134 | ]) |
| 135 | writer.newline() |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 136 | codes = sorted(self.cmap.items()) |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 137 | self._writeCodes(codes, writer) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 138 | writer.endtag(self.__class__.__name__) |
| 139 | writer.newline() |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 140 | |
Behdad Esfahbod | 0f74e80 | 2014-03-12 12:17:33 -0700 | [diff] [blame] | 141 | def isUnicode(self): |
| 142 | return (self.platformID == 0 or |
| 143 | (self.platformID == 3 and self.platEncID in [1, 10])) |
| 144 | |
| 145 | def isSymbol(self): |
| 146 | return self.platformID == 3 and self.platEncID == 0 |
| 147 | |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 148 | def _writeCodes(self, codes, writer): |
Behdad Esfahbod | 0f74e80 | 2014-03-12 12:17:33 -0700 | [diff] [blame] | 149 | isUnicode = self.isUnicode() |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 150 | for code, name in codes: |
| 151 | writer.simpletag("map", code=hex(code), name=name) |
| 152 | if isUnicode: |
| 153 | writer.comment(Unicode[code]) |
| 154 | writer.newline() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 155 | |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 156 | def __lt__(self, other): |
| 157 | if not isinstance(other, CmapSubtable): |
Behdad Esfahbod | 273a900 | 2013-12-07 03:40:44 -0500 | [diff] [blame] | 158 | return NotImplemented |
Behdad Esfahbod | 96b321c | 2013-08-17 11:11:22 -0400 | [diff] [blame] | 159 | |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 160 | # implemented so that list.sort() sorts according to the spec. |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 161 | selfTuple = ( |
Behdad Esfahbod | 94118dc | 2013-10-28 12:16:41 +0100 | [diff] [blame] | 162 | getattr(self, "platformID", None), |
| 163 | getattr(self, "platEncID", None), |
| 164 | getattr(self, "language", None), |
| 165 | self.__dict__) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 166 | otherTuple = ( |
Behdad Esfahbod | 94118dc | 2013-10-28 12:16:41 +0100 | [diff] [blame] | 167 | getattr(other, "platformID", None), |
| 168 | getattr(other, "platEncID", None), |
| 169 | getattr(other, "language", None), |
| 170 | other.__dict__) |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 171 | return selfTuple < otherTuple |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 172 | |
| 173 | |
| 174 | class cmap_format_0(CmapSubtable): |
| 175 | |
| 176 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 177 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 178 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 179 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 180 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 181 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 182 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 183 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 184 | assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 185 | glyphIdArray = array.array("B") |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 186 | glyphIdArray.fromstring(self.data) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 187 | self.cmap = cmap = {} |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 188 | lenArray = len(glyphIdArray) |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 189 | charCodes = list(range(lenArray)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 190 | names = map(self.ttFont.getGlyphName, glyphIdArray) |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 191 | list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 192 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 193 | |
| 194 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 195 | if self.data: |
| 196 | return struct.pack(">HHH", 0, 262, self.language) + self.data |
| 197 | |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 198 | charCodeList = sorted(self.cmap.items()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 199 | charCodes = [entry[0] for entry in charCodeList] |
| 200 | valueList = [entry[1] for entry in charCodeList] |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 201 | assert charCodes == list(range(256)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 202 | valueList = map(ttFont.getGlyphID, valueList) |
| 203 | |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 204 | glyphIdArray = array.array("B", valueList) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 205 | data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 206 | assert len(data) == 262 |
| 207 | return data |
| 208 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 209 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 210 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 211 | if not hasattr(self, "cmap"): |
| 212 | self.cmap = {} |
| 213 | cmap = self.cmap |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 214 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 215 | if not isinstance(element, tuple): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 216 | continue |
| 217 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 218 | if name != "map": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 219 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 220 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 221 | |
| 222 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 223 | subHeaderFormat = ">HHhH" |
Behdad Esfahbod | e388db5 | 2013-11-28 14:26:58 -0500 | [diff] [blame] | 224 | class SubHeader(object): |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 225 | def __init__(self): |
| 226 | self.firstCode = None |
| 227 | self.entryCount = None |
| 228 | self.idDelta = None |
| 229 | self.idRangeOffset = None |
| 230 | self.glyphIndexArray = [] |
| 231 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 232 | class cmap_format_2(CmapSubtable): |
| 233 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 234 | def setIDDelta(self, subHeader): |
| 235 | subHeader.idDelta = 0 |
| 236 | # find the minGI which is not zero. |
| 237 | minGI = subHeader.glyphIndexArray[0] |
| 238 | for gid in subHeader.glyphIndexArray: |
| 239 | if (gid != 0) and (gid < minGI): |
| 240 | minGI = gid |
| 241 | # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. |
| 242 | # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. |
| 243 | # We would like to pick an idDelta such that the first glyphArray GID is 1, |
| 244 | # so that we are more likely to be able to combine glypharray GID subranges. |
| 245 | # This means that we have a problem when minGI is > 32K |
| 246 | # Since the final gi is reconstructed from the glyphArray GID by: |
| 247 | # (short)finalGID = (gid + idDelta) % 0x10000), |
| 248 | # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the |
| 249 | # negative number to an unsigned short. |
| 250 | |
| 251 | if (minGI > 1): |
| 252 | if minGI > 0x7FFF: |
| 253 | subHeader.idDelta = -(0x10000 - minGI) -1 |
| 254 | else: |
| 255 | subHeader.idDelta = minGI -1 |
| 256 | idDelta = subHeader.idDelta |
| 257 | for i in range(subHeader.entryCount): |
| 258 | gid = subHeader.glyphIndexArray[i] |
| 259 | if gid > 0: |
| 260 | subHeader.glyphIndexArray[i] = gid - idDelta |
| 261 | |
| 262 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 263 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 264 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 265 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 266 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 267 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 268 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 269 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 270 | |
| 271 | data = self.data # decompileHeader assigns the data after the header to self.data |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 272 | subHeaderKeys = [] |
| 273 | maxSubHeaderindex = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 274 | # get the key array, and determine the number of subHeaders. |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 275 | allKeys = array.array("H") |
| 276 | allKeys.fromstring(data[:512]) |
| 277 | data = data[512:] |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 278 | if sys.byteorder != "big": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 279 | allKeys.byteswap() |
Behdad Esfahbod | 32c10ee | 2013-11-27 17:46:17 -0500 | [diff] [blame] | 280 | subHeaderKeys = [ key//8 for key in allKeys] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 281 | maxSubHeaderindex = max(subHeaderKeys) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 282 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 283 | #Load subHeaders |
| 284 | subHeaderList = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 285 | pos = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 286 | for i in range(maxSubHeaderindex + 1): |
| 287 | subHeader = SubHeader() |
| 288 | (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 289 | subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) |
| 290 | pos += 8 |
| 291 | giDataPos = pos + subHeader.idRangeOffset-2 |
| 292 | giList = array.array("H") |
| 293 | giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 294 | if sys.byteorder != "big": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 295 | giList.byteswap() |
| 296 | subHeader.glyphIndexArray = giList |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 297 | subHeaderList.append(subHeader) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 298 | # How this gets processed. |
| 299 | # Charcodes may be one or two bytes. |
| 300 | # The first byte of a charcode is mapped through the subHeaderKeys, to select |
| 301 | # a subHeader. For any subheader but 0, the next byte is then mapped through the |
| 302 | # selected subheader. If subheader Index 0 is selected, then the byte itself is |
| 303 | # mapped through the subheader, and there is no second byte. |
| 304 | # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. |
| 305 | # |
| 306 | # Each subheader references a range in the glyphIndexArray whose length is entryCount. |
| 307 | # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray |
| 308 | # referenced by another subheader. |
| 309 | # The only subheader that will be referenced by more than one first-byte value is the subheader |
| 310 | # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: |
| 311 | # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} |
| 312 | # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. |
| 313 | # A subheader specifies a subrange within (0...256) by the |
| 314 | # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero |
| 315 | # (e.g. glyph not in font). |
| 316 | # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). |
| 317 | # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by |
| 318 | # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the |
| 319 | # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. |
| 320 | # Example for Logocut-Medium |
| 321 | # first byte of charcode = 129; selects subheader 1. |
| 322 | # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} |
| 323 | # second byte of charCode = 66 |
| 324 | # the index offset = 66-64 = 2. |
| 325 | # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: |
| 326 | # [glyphIndexArray index], [subrange array index] = glyphIndex |
| 327 | # [256], [0]=1 from charcode [129, 64] |
| 328 | # [257], [1]=2 from charcode [129, 65] |
| 329 | # [258], [2]=3 from charcode [129, 66] |
| 330 | # [259], [3]=4 from charcode [129, 67] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 331 | # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, |
| 332 | # add it to the glyphID to get the final glyphIndex |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 333 | # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 334 | |
Behdad Esfahbod | 5f6418d | 2013-11-27 22:00:49 -0500 | [diff] [blame] | 335 | self.data = b"" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 336 | self.cmap = cmap = {} |
| 337 | notdefGI = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 338 | for firstByte in range(256): |
| 339 | subHeadindex = subHeaderKeys[firstByte] |
| 340 | subHeader = subHeaderList[subHeadindex] |
| 341 | if subHeadindex == 0: |
| 342 | if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 343 | continue # gi is notdef. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 344 | else: |
| 345 | charCode = firstByte |
| 346 | offsetIndex = firstByte - subHeader.firstCode |
| 347 | gi = subHeader.glyphIndexArray[offsetIndex] |
| 348 | if gi != 0: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 349 | gi = (gi + subHeader.idDelta) % 0x10000 |
| 350 | else: |
| 351 | continue # gi is notdef. |
| 352 | cmap[charCode] = gi |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 353 | else: |
| 354 | if subHeader.entryCount: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 355 | charCodeOffset = firstByte * 256 + subHeader.firstCode |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 356 | for offsetIndex in range(subHeader.entryCount): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 357 | charCode = charCodeOffset + offsetIndex |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 358 | gi = subHeader.glyphIndexArray[offsetIndex] |
| 359 | if gi != 0: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 360 | gi = (gi + subHeader.idDelta) % 0x10000 |
| 361 | else: |
| 362 | continue |
| 363 | cmap[charCode] = gi |
| 364 | # If not subHeader.entryCount, then all char codes with this first byte are |
| 365 | # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the |
| 366 | # same as mapping it to .notdef. |
| 367 | # cmap values are GID's. |
| 368 | glyphOrder = self.ttFont.getGlyphOrder() |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 369 | gids = list(cmap.values()) |
| 370 | charCodes = list(cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 371 | lenCmap = len(gids) |
| 372 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 373 | names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 374 | except IndexError: |
| 375 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 376 | names = list(map(getGlyphName, gids )) |
| 377 | list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 378 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 379 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 380 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 381 | if self.data: |
| 382 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 383 | kEmptyTwoCharCodeRange = -1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 384 | notdefGI = 0 |
| 385 | |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 386 | items = sorted(self.cmap.items()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 387 | charCodes = [item[0] for item in items] |
| 388 | names = [item[1] for item in items] |
| 389 | nameMap = ttFont.getReverseGlyphMap() |
| 390 | lenCharCodes = len(charCodes) |
| 391 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 392 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 393 | except KeyError: |
Behdad Esfahbod | dc87372 | 2013-12-04 21:28:50 -0500 | [diff] [blame] | 394 | nameMap = ttFont.getReverseGlyphMap(rebuild=True) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 395 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 396 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 397 | except KeyError: |
| 398 | # allow virtual GIDs in format 2 tables |
| 399 | gids = [] |
| 400 | for name in names: |
| 401 | try: |
| 402 | gid = nameMap[name] |
| 403 | except KeyError: |
| 404 | try: |
| 405 | if (name[:3] == 'gid'): |
| 406 | gid = eval(name[3:]) |
| 407 | else: |
| 408 | gid = ttFont.getGlyphID(name) |
| 409 | except: |
| 410 | raise KeyError(name) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 411 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 412 | gids.append(gid) |
| 413 | |
| 414 | # Process the (char code to gid) item list in char code order. |
| 415 | # By definition, all one byte char codes map to subheader 0. |
| 416 | # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, |
| 417 | # which defines all char codes in its range to map to notdef) unless proven otherwise. |
| 418 | # Note that since the char code items are processed in char code order, all the char codes with the |
| 419 | # same first byte are in sequential order. |
| 420 | |
| 421 | subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 422 | subHeaderList = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 423 | |
| 424 | # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up |
| 425 | # with a cmap where all the one byte char codes map to notdef, |
| 426 | # with the result that the subhead 0 would not get created just by processing the item list. |
| 427 | charCode = charCodes[0] |
| 428 | if charCode > 255: |
| 429 | subHeader = SubHeader() |
| 430 | subHeader.firstCode = 0 |
| 431 | subHeader.entryCount = 0 |
| 432 | subHeader.idDelta = 0 |
| 433 | subHeader.idRangeOffset = 0 |
| 434 | subHeaderList.append(subHeader) |
| 435 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 436 | |
| 437 | lastFirstByte = -1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 438 | items = zip(charCodes, gids) |
| 439 | for charCode, gid in items: |
| 440 | if gid == 0: |
| 441 | continue |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 442 | firstbyte = charCode >> 8 |
| 443 | secondByte = charCode & 0x00FF |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 444 | |
| 445 | if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 446 | if lastFirstByte > -1: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 447 | # fix GI's and iDelta of current subheader. |
| 448 | self.setIDDelta(subHeader) |
| 449 | |
| 450 | # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero |
| 451 | # for the indices matching the char codes. |
| 452 | if lastFirstByte == 0: |
| 453 | for index in range(subHeader.entryCount): |
| 454 | charCode = subHeader.firstCode + index |
| 455 | subHeaderKeys[charCode] = 0 |
| 456 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 457 | assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." |
| 458 | # init new subheader |
| 459 | subHeader = SubHeader() |
| 460 | subHeader.firstCode = secondByte |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 461 | subHeader.entryCount = 1 |
| 462 | subHeader.glyphIndexArray.append(gid) |
| 463 | subHeaderList.append(subHeader) |
| 464 | subHeaderKeys[firstbyte] = len(subHeaderList) -1 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 465 | lastFirstByte = firstbyte |
| 466 | else: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 467 | # need to fill in with notdefs all the code points between the last charCode and the current charCode. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 468 | codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) |
| 469 | for i in range(codeDiff): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 470 | subHeader.glyphIndexArray.append(notdefGI) |
| 471 | subHeader.glyphIndexArray.append(gid) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 472 | subHeader.entryCount = subHeader.entryCount + codeDiff + 1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 473 | |
| 474 | # fix GI's and iDelta of last subheader that we we added to the subheader array. |
| 475 | self.setIDDelta(subHeader) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 476 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 477 | # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 478 | subHeader = SubHeader() |
| 479 | subHeader.firstCode = 0 |
| 480 | subHeader.entryCount = 0 |
| 481 | subHeader.idDelta = 0 |
| 482 | subHeader.idRangeOffset = 2 |
| 483 | subHeaderList.append(subHeader) |
| 484 | emptySubheadIndex = len(subHeaderList) - 1 |
| 485 | for index in range(256): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 486 | if subHeaderKeys[index] == kEmptyTwoCharCodeRange: |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 487 | subHeaderKeys[index] = emptySubheadIndex |
| 488 | # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 489 | # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 490 | # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with |
| 491 | # charcode 0 and GID 0. |
| 492 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 493 | idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 494 | subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. |
| 495 | for index in range(subheadRangeLen): |
| 496 | subHeader = subHeaderList[index] |
| 497 | subHeader.idRangeOffset = 0 |
| 498 | for j in range(index): |
| 499 | prevSubhead = subHeaderList[j] |
| 500 | if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray |
| 501 | subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 |
| 502 | subHeader.glyphIndexArray = [] |
| 503 | break |
| 504 | if subHeader.idRangeOffset == 0: # didn't find one. |
| 505 | subHeader.idRangeOffset = idRangeOffset |
| 506 | idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. |
| 507 | else: |
| 508 | idRangeOffset = idRangeOffset - 8 # one less subheader |
| 509 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 510 | # Now we can write out the data! |
| 511 | length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. |
| 512 | for subhead in subHeaderList[:-1]: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 513 | length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. |
| 514 | dataList = [struct.pack(">HHH", 2, length, self.language)] |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 515 | for index in subHeaderKeys: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 516 | dataList.append(struct.pack(">H", index*8)) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 517 | for subhead in subHeaderList: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 518 | dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 519 | for subhead in subHeaderList[:-1]: |
| 520 | for gi in subhead.glyphIndexArray: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 521 | dataList.append(struct.pack(">H", gi)) |
Behdad Esfahbod | 18316aa | 2013-11-27 21:17:35 -0500 | [diff] [blame] | 522 | data = bytesjoin(dataList) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 523 | assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) |
| 524 | return data |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 525 | |
| 526 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 527 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 528 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 529 | if not hasattr(self, "cmap"): |
| 530 | self.cmap = {} |
| 531 | cmap = self.cmap |
| 532 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 533 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 534 | if not isinstance(element, tuple): |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 535 | continue |
| 536 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 537 | if name != "map": |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 538 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 539 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 540 | |
| 541 | |
| 542 | cmap_format_4_format = ">7H" |
| 543 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 544 | #uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. |
| 545 | #uint16 reservedPad # This value should be zero |
| 546 | #uint16 startCode[segCount] # Starting character code for each segment |
| 547 | #uint16 idDelta[segCount] # Delta for all character codes in segment |
| 548 | #uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 |
| 549 | #uint16 glyphIndexArray[variable] # Glyph index array |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 550 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 551 | def splitRange(startCode, endCode, cmap): |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 552 | # Try to split a range of character codes into subranges with consecutive |
| 553 | # glyph IDs in such a way that the cmap4 subtable can be stored "most" |
| 554 | # efficiently. I can't prove I've got the optimal solution, but it seems |
| 555 | # to do well with the fonts I tested: none became bigger, many became smaller. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 556 | if startCode == endCode: |
| 557 | return [], [endCode] |
| 558 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 559 | lastID = cmap[startCode] |
| 560 | lastCode = startCode |
| 561 | inOrder = None |
| 562 | orderedBegin = None |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 563 | subRanges = [] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 564 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 565 | # Gather subranges in which the glyph IDs are consecutive. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 566 | for code in range(startCode + 1, endCode + 1): |
| 567 | glyphID = cmap[code] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 568 | |
| 569 | if glyphID - 1 == lastID: |
| 570 | if inOrder is None or not inOrder: |
| 571 | inOrder = 1 |
| 572 | orderedBegin = lastCode |
| 573 | else: |
| 574 | if inOrder: |
| 575 | inOrder = 0 |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 576 | subRanges.append((orderedBegin, lastCode)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 577 | orderedBegin = None |
| 578 | |
| 579 | lastID = glyphID |
| 580 | lastCode = code |
| 581 | |
| 582 | if inOrder: |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 583 | subRanges.append((orderedBegin, lastCode)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 584 | assert lastCode == endCode |
| 585 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 586 | # Now filter out those new subranges that would only make the data bigger. |
| 587 | # A new segment cost 8 bytes, not using a new segment costs 2 bytes per |
| 588 | # character. |
| 589 | newRanges = [] |
| 590 | for b, e in subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 591 | if b == startCode and e == endCode: |
| 592 | break # the whole range, we're fine |
| 593 | if b == startCode or e == endCode: |
| 594 | threshold = 4 # split costs one more segment |
| 595 | else: |
| 596 | threshold = 8 # split costs two more segments |
| 597 | if (e - b + 1) > threshold: |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 598 | newRanges.append((b, e)) |
| 599 | subRanges = newRanges |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 600 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 601 | if not subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 602 | return [], [endCode] |
| 603 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 604 | if subRanges[0][0] != startCode: |
| 605 | subRanges.insert(0, (startCode, subRanges[0][0] - 1)) |
| 606 | if subRanges[-1][1] != endCode: |
| 607 | subRanges.append((subRanges[-1][1] + 1, endCode)) |
| 608 | |
| 609 | # Fill the "holes" in the segments list -- those are the segments in which |
| 610 | # the glyph IDs are _not_ consecutive. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 611 | i = 1 |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 612 | while i < len(subRanges): |
| 613 | if subRanges[i-1][1] + 1 != subRanges[i][0]: |
| 614 | subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 615 | i = i + 1 |
| 616 | i = i + 1 |
| 617 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 618 | # Transform the ranges into startCode/endCode lists. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 619 | start = [] |
| 620 | end = [] |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 621 | for b, e in subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 622 | start.append(b) |
| 623 | end.append(e) |
| 624 | start.pop(0) |
| 625 | |
| 626 | assert len(start) + 1 == len(end) |
| 627 | return start, end |
| 628 | |
| 629 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 630 | class cmap_format_4(CmapSubtable): |
| 631 | |
| 632 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 633 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 634 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 635 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 636 | self.decompileHeader(self.data[offset:offset+int(length)], ttFont) |
| 637 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 638 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 639 | |
| 640 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 641 | (segCountX2, searchRange, entrySelector, rangeShift) = \ |
| 642 | struct.unpack(">4H", data[:8]) |
| 643 | data = data[8:] |
Behdad Esfahbod | 32c10ee | 2013-11-27 17:46:17 -0500 | [diff] [blame] | 644 | segCount = segCountX2 // 2 |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 645 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 646 | allCodes = array.array("H") |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 647 | allCodes.fromstring(data) |
| 648 | self.data = data = None |
| 649 | |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 650 | if sys.byteorder != "big": |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 651 | allCodes.byteswap() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 652 | |
| 653 | # divide the data |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 654 | endCode = allCodes[:segCount] |
| 655 | allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field |
| 656 | startCode = allCodes[:segCount] |
| 657 | allCodes = allCodes[segCount:] |
| 658 | idDelta = allCodes[:segCount] |
| 659 | allCodes = allCodes[segCount:] |
| 660 | idRangeOffset = allCodes[:segCount] |
| 661 | glyphIndexArray = allCodes[segCount:] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 662 | lenGIArray = len(glyphIndexArray) |
| 663 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 664 | # build 2-byte character mapping |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 665 | charCodes = [] |
| 666 | gids = [] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 667 | for i in range(len(startCode) - 1): # don't do 0xffff! |
Behdad Esfahbod | 2db5eca | 2014-05-14 00:08:15 -0400 | [diff] [blame] | 668 | start = startCode[i] |
| 669 | delta = idDelta[i] |
| 670 | rangeOffset = idRangeOffset[i] |
| 671 | # *someone* needs to get killed. |
| 672 | partial = rangeOffset // 2 - start + i - len(idRangeOffset) |
| 673 | |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 674 | rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) |
Behdad Esfahbod | 0d182bf | 2014-05-14 00:00:47 -0400 | [diff] [blame] | 675 | charCodes.extend(rangeCharCodes) |
Behdad Esfahbod | 470d610 | 2014-05-14 00:12:03 -0400 | [diff] [blame] | 676 | if rangeOffset == 0: |
| 677 | gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes]) |
| 678 | else: |
| 679 | for charCode in rangeCharCodes: |
Behdad Esfahbod | 2db5eca | 2014-05-14 00:08:15 -0400 | [diff] [blame] | 680 | index = charCode + partial |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 681 | assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 682 | if glyphIndexArray[index] != 0: # if not missing glyph |
Behdad Esfahbod | 2db5eca | 2014-05-14 00:08:15 -0400 | [diff] [blame] | 683 | glyphID = glyphIndexArray[index] + delta |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 684 | else: |
| 685 | glyphID = 0 # missing glyph |
Behdad Esfahbod | 470d610 | 2014-05-14 00:12:03 -0400 | [diff] [blame] | 686 | gids.append(glyphID & 0xFFFF) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 687 | |
| 688 | self.cmap = cmap = {} |
| 689 | lenCmap = len(gids) |
| 690 | glyphOrder = self.ttFont.getGlyphOrder() |
| 691 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 692 | names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 693 | except IndexError: |
| 694 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 695 | names = list(map(getGlyphName, gids )) |
| 696 | list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 697 | |
| 698 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 699 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 700 | if self.data: |
| 701 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
| 702 | |
jvr | ea9dfa9 | 2002-05-12 17:14:50 +0000 | [diff] [blame] | 703 | from fontTools.ttLib.sfnt import maxPowerOfTwo |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 704 | |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 705 | charCodes = list(self.cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 706 | lenCharCodes = len(charCodes) |
| 707 | if lenCharCodes == 0: |
| 708 | startCode = [0xffff] |
| 709 | endCode = [0xffff] |
| 710 | else: |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 711 | charCodes.sort() |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 712 | names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 713 | nameMap = ttFont.getReverseGlyphMap() |
| 714 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 715 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 716 | except KeyError: |
Behdad Esfahbod | dc87372 | 2013-12-04 21:28:50 -0500 | [diff] [blame] | 717 | nameMap = ttFont.getReverseGlyphMap(rebuild=True) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 718 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 719 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 720 | except KeyError: |
| 721 | # allow virtual GIDs in format 4 tables |
| 722 | gids = [] |
| 723 | for name in names: |
| 724 | try: |
| 725 | gid = nameMap[name] |
| 726 | except KeyError: |
| 727 | try: |
| 728 | if (name[:3] == 'gid'): |
| 729 | gid = eval(name[3:]) |
| 730 | else: |
| 731 | gid = ttFont.getGlyphID(name) |
| 732 | except: |
| 733 | raise KeyError(name) |
| 734 | |
| 735 | gids.append(gid) |
| 736 | cmap = {} # code:glyphID mapping |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 737 | list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 738 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 739 | # Build startCode and endCode lists. |
| 740 | # Split the char codes in ranges of consecutive char codes, then split |
| 741 | # each range in more ranges of consecutive/not consecutive glyph IDs. |
| 742 | # See splitRange(). |
| 743 | lastCode = charCodes[0] |
| 744 | endCode = [] |
| 745 | startCode = [lastCode] |
| 746 | for charCode in charCodes[1:]: # skip the first code, it's the first start code |
| 747 | if charCode == lastCode + 1: |
| 748 | lastCode = charCode |
| 749 | continue |
| 750 | start, end = splitRange(startCode[-1], lastCode, cmap) |
| 751 | startCode.extend(start) |
| 752 | endCode.extend(end) |
| 753 | startCode.append(charCode) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 754 | lastCode = charCode |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 755 | endCode.append(lastCode) |
| 756 | startCode.append(0xffff) |
| 757 | endCode.append(0xffff) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 758 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 759 | # build up rest of cruft |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 760 | idDelta = [] |
| 761 | idRangeOffset = [] |
| 762 | glyphIndexArray = [] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 763 | for i in range(len(endCode)-1): # skip the closing codes (0xffff) |
| 764 | indices = [] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 765 | for charCode in range(startCode[i], endCode[i] + 1): |
| 766 | indices.append(cmap[charCode]) |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 767 | if (indices == list(range(indices[0], indices[0] + len(indices)))): |
Behdad Esfahbod | ec5f515 | 2014-05-21 19:13:34 -0400 | [diff] [blame] | 768 | idDelta.append((indices[0] - startCode[i]) % 0x10000) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 769 | idRangeOffset.append(0) |
| 770 | else: |
| 771 | # someone *definitely* needs to get killed. |
| 772 | idDelta.append(0) |
| 773 | idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 774 | glyphIndexArray.extend(indices) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 775 | idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef |
| 776 | idRangeOffset.append(0) |
| 777 | |
| 778 | # Insane. |
| 779 | segCount = len(endCode) |
| 780 | segCountX2 = segCount * 2 |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 781 | maxExponent = maxPowerOfTwo(segCount) |
| 782 | searchRange = 2 * (2 ** maxExponent) |
| 783 | entrySelector = maxExponent |
Behdad Esfahbod | 9fed952 | 2014-05-27 15:54:04 -0400 | [diff] [blame^] | 784 | rangeShift = max(0, 2 * segCount - searchRange) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 785 | |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 786 | charCodeArray = array.array("H", endCode + [0] + startCode) |
Behdad Esfahbod | ec5f515 | 2014-05-21 19:13:34 -0400 | [diff] [blame] | 787 | idDeltaArray = array.array("H", idDelta) |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 788 | restArray = array.array("H", idRangeOffset + glyphIndexArray) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 789 | if sys.byteorder != "big": |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 790 | charCodeArray.byteswap() |
Behdad Esfahbod | 4c777d3 | 2014-05-21 17:57:14 -0400 | [diff] [blame] | 791 | idDeltaArray.byteswap() |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 792 | restArray.byteswap() |
Behdad Esfahbod | 4c777d3 | 2014-05-21 17:57:14 -0400 | [diff] [blame] | 793 | data = charCodeArray.tostring() + idDeltaArray.tostring() + restArray.tostring() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 794 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 795 | length = struct.calcsize(cmap_format_4_format) + len(data) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 796 | header = struct.pack(cmap_format_4_format, self.format, length, self.language, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 797 | segCountX2, searchRange, entrySelector, rangeShift) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 798 | return header + data |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 799 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 800 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 801 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 802 | if not hasattr(self, "cmap"): |
| 803 | self.cmap = {} |
| 804 | cmap = self.cmap |
| 805 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 806 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 807 | if not isinstance(element, tuple): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 808 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 809 | nameMap, attrsMap, dummyContent = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 810 | if nameMap != "map": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 811 | assert 0, "Unrecognized keyword in cmap subtable" |
| 812 | cmap[safeEval(attrsMap["code"])] = attrsMap["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 813 | |
| 814 | |
| 815 | class cmap_format_6(CmapSubtable): |
| 816 | |
| 817 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 818 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 819 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 820 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 821 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 822 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 823 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 824 | |
| 825 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 826 | firstCode, entryCount = struct.unpack(">HH", data[:4]) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 827 | firstCode = int(firstCode) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 828 | data = data[4:] |
Just | f6b1563 | 2000-08-23 12:33:14 +0000 | [diff] [blame] | 829 | #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 830 | glyphIndexArray = array.array("H") |
Just | 43fa4be | 2000-10-11 18:04:03 +0000 | [diff] [blame] | 831 | glyphIndexArray.fromstring(data[:2 * int(entryCount)]) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 832 | if sys.byteorder != "big": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 833 | glyphIndexArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 834 | self.data = data = None |
| 835 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 836 | self.cmap = cmap = {} |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 837 | |
| 838 | lenArray = len(glyphIndexArray) |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 839 | charCodes = list(range(firstCode, firstCode + lenArray)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 840 | glyphOrder = self.ttFont.getGlyphOrder() |
| 841 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 842 | names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 843 | except IndexError: |
| 844 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 845 | names = list(map(getGlyphName, glyphIndexArray )) |
| 846 | list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 847 | |
| 848 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 849 | if self.data: |
| 850 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
| 851 | cmap = self.cmap |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 852 | codes = list(cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 853 | if codes: # yes, there are empty cmap tables. |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 854 | codes = list(range(codes[0], codes[-1] + 1)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 855 | firstCode = codes[0] |
Behdad Esfahbod | 13a08d0 | 2013-11-26 15:49:36 -0500 | [diff] [blame] | 856 | valueList = [cmap.get(code, ".notdef") for code in codes] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 857 | valueList = map(ttFont.getGlyphID, valueList) |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 858 | glyphIndexArray = array.array("H", valueList) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 859 | if sys.byteorder != "big": |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 860 | glyphIndexArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 861 | data = glyphIndexArray.tostring() |
| 862 | else: |
Behdad Esfahbod | 5f6418d | 2013-11-27 22:00:49 -0500 | [diff] [blame] | 863 | data = b"" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 864 | firstCode = 0 |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 865 | header = struct.pack(">HHHHH", |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 866 | 6, len(data) + 10, self.language, firstCode, len(codes)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 867 | return header + data |
| 868 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 869 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 870 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 871 | if not hasattr(self, "cmap"): |
| 872 | self.cmap = {} |
| 873 | cmap = self.cmap |
| 874 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 875 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 876 | if not isinstance(element, tuple): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 877 | continue |
| 878 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 879 | if name != "map": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 880 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 881 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 882 | |
| 883 | |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 884 | class cmap_format_12_or_13(CmapSubtable): |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 885 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 886 | def __init__(self, format): |
| 887 | self.format = format |
| 888 | self.reserved = 0 |
| 889 | self.data = None |
| 890 | self.ttFont = None |
| 891 | |
| 892 | def decompileHeader(self, data, ttFont): |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 893 | format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 894 | assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 895 | self.format = format |
| 896 | self.reserved = reserved |
| 897 | self.length = length |
| 898 | self.language = language |
| 899 | self.nGroups = nGroups |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 900 | self.data = data[16:] |
| 901 | self.ttFont = ttFont |
| 902 | |
| 903 | def decompile(self, data, ttFont): |
| 904 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 905 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 906 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 907 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 908 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 909 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 910 | |
| 911 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 912 | charCodes = [] |
| 913 | gids = [] |
| 914 | pos = 0 |
| 915 | for i in range(self.nGroups): |
| 916 | startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) |
| 917 | pos += 12 |
| 918 | lenGroup = 1 + endCharCode - startCharCode |
Behdad Esfahbod | d56eebf | 2014-05-14 00:22:14 -0400 | [diff] [blame] | 919 | charCodes.extend(list(range(startCharCode, endCharCode +1))) |
| 920 | gids.extend(self._computeGIDs(glyphID, lenGroup)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 921 | self.data = data = None |
| 922 | self.cmap = cmap = {} |
| 923 | lenCmap = len(gids) |
| 924 | glyphOrder = self.ttFont.getGlyphOrder() |
| 925 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 926 | names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 927 | except IndexError: |
| 928 | getGlyphName = self.ttFont.getGlyphName |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 929 | names = list(map(getGlyphName, gids )) |
| 930 | list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 931 | |
| 932 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 933 | if self.data: |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 934 | return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 935 | charCodes = list(self.cmap.keys()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 936 | lenCharCodes = len(charCodes) |
Behdad Esfahbod | c2297cd | 2013-11-27 06:26:55 -0500 | [diff] [blame] | 937 | names = list(self.cmap.values()) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 938 | nameMap = ttFont.getReverseGlyphMap() |
| 939 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 940 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 941 | except KeyError: |
Behdad Esfahbod | dc87372 | 2013-12-04 21:28:50 -0500 | [diff] [blame] | 942 | nameMap = ttFont.getReverseGlyphMap(rebuild=True) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 943 | try: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 944 | gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 945 | except KeyError: |
| 946 | # allow virtual GIDs in format 12 tables |
| 947 | gids = [] |
| 948 | for name in names: |
| 949 | try: |
| 950 | gid = nameMap[name] |
| 951 | except KeyError: |
| 952 | try: |
| 953 | if (name[:3] == 'gid'): |
| 954 | gid = eval(name[3:]) |
| 955 | else: |
| 956 | gid = ttFont.getGlyphID(name) |
| 957 | except: |
| 958 | raise KeyError(name) |
| 959 | |
| 960 | gids.append(gid) |
| 961 | |
| 962 | cmap = {} # code:glyphID mapping |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 963 | list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 964 | |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 965 | charCodes.sort() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 966 | index = 0 |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 967 | startCharCode = charCodes[0] |
| 968 | startGlyphID = cmap[startCharCode] |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 969 | lastGlyphID = startGlyphID - self._format_step |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 970 | lastCharCode = startCharCode - 1 |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 971 | nGroups = 0 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 972 | dataList = [] |
| 973 | maxIndex = len(charCodes) |
| 974 | for index in range(maxIndex): |
| 975 | charCode = charCodes[index] |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 976 | glyphID = cmap[charCode] |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 977 | if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 978 | dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 979 | startCharCode = charCode |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 980 | startGlyphID = glyphID |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 981 | nGroups = nGroups + 1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 982 | lastGlyphID = glyphID |
| 983 | lastCharCode = charCode |
| 984 | dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 985 | nGroups = nGroups + 1 |
Behdad Esfahbod | 18316aa | 2013-11-27 21:17:35 -0500 | [diff] [blame] | 986 | data = bytesjoin(dataList) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 987 | lengthSubtable = len(data) +16 |
| 988 | assert len(data) == (nGroups*12) == (lengthSubtable-16) |
| 989 | return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 990 | |
| 991 | def toXML(self, writer, ttFont): |
| 992 | writer.begintag(self.__class__.__name__, [ |
| 993 | ("platformID", self.platformID), |
| 994 | ("platEncID", self.platEncID), |
| 995 | ("format", self.format), |
| 996 | ("reserved", self.reserved), |
| 997 | ("length", self.length), |
| 998 | ("language", self.language), |
| 999 | ("nGroups", self.nGroups), |
| 1000 | ]) |
| 1001 | writer.newline() |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 1002 | codes = sorted(self.cmap.items()) |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1003 | self._writeCodes(codes, writer) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1004 | writer.endtag(self.__class__.__name__) |
| 1005 | writer.newline() |
| 1006 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 1007 | def fromXML(self, name, attrs, content, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1008 | self.format = safeEval(attrs["format"]) |
| 1009 | self.reserved = safeEval(attrs["reserved"]) |
| 1010 | self.length = safeEval(attrs["length"]) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1011 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1012 | self.nGroups = safeEval(attrs["nGroups"]) |
| 1013 | if not hasattr(self, "cmap"): |
| 1014 | self.cmap = {} |
| 1015 | cmap = self.cmap |
| 1016 | |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1017 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 1018 | if not isinstance(element, tuple): |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1019 | continue |
| 1020 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 1021 | if name != "map": |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1022 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1023 | cmap[safeEval(attrs["code"])] = attrs["name"] |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1024 | |
| 1025 | |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 1026 | class cmap_format_12(cmap_format_12_or_13): |
| 1027 | def __init__(self, format): |
| 1028 | cmap_format_12_or_13.__init__(self, format) |
| 1029 | self._format_step = 1 |
| 1030 | |
| 1031 | def _computeGIDs(self, startingGlyph, numberOfGlyphs): |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 1032 | return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 1033 | |
| 1034 | def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): |
| 1035 | return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) |
| 1036 | |
| 1037 | |
| 1038 | class cmap_format_13(cmap_format_12_or_13): |
| 1039 | def __init__(self, format): |
| 1040 | cmap_format_12_or_13.__init__(self, format) |
| 1041 | self._format_step = 0 |
| 1042 | |
| 1043 | def _computeGIDs(self, startingGlyph, numberOfGlyphs): |
| 1044 | return [startingGlyph] * numberOfGlyphs |
| 1045 | |
| 1046 | def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): |
| 1047 | return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) |
| 1048 | |
| 1049 | |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1050 | def cvtToUVS(threeByteString): |
Behdad Esfahbod | 2242b26 | 2013-11-28 06:35:12 -0500 | [diff] [blame] | 1051 | data = b"\0" + threeByteString |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1052 | val, = struct.unpack(">L", data) |
| 1053 | return val |
| 1054 | |
| 1055 | def cvtFromUVS(val): |
Behdad Esfahbod | 2242b26 | 2013-11-28 06:35:12 -0500 | [diff] [blame] | 1056 | assert 0 <= val < 0x1000000 |
| 1057 | fourByteString = struct.pack(">L", val) |
| 1058 | return fourByteString[1:] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1059 | |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 1060 | |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1061 | class cmap_format_14(CmapSubtable): |
| 1062 | |
| 1063 | def decompileHeader(self, data, ttFont): |
| 1064 | format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) |
| 1065 | self.data = data[10:] |
| 1066 | self.length = length |
| 1067 | self.numVarSelectorRecords = numVarSelectorRecords |
| 1068 | self.ttFont = ttFont |
| 1069 | self.language = 0xFF # has no language. |
| 1070 | |
| 1071 | def decompile(self, data, ttFont): |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1072 | if data is not None and ttFont is not None and ttFont.lazy: |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1073 | self.decompileHeader(data, ttFont) |
| 1074 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1075 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1076 | data = self.data |
| 1077 | |
| 1078 | self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. |
| 1079 | uvsDict = {} |
| 1080 | recOffset = 0 |
| 1081 | for n in range(self.numVarSelectorRecords): |
| 1082 | uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) |
| 1083 | recOffset += 11 |
| 1084 | varUVS = cvtToUVS(uvs) |
| 1085 | if defOVSOffset: |
| 1086 | startOffset = defOVSOffset - 10 |
| 1087 | numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) |
| 1088 | startOffset +=4 |
| 1089 | for r in range(numValues): |
| 1090 | uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) |
| 1091 | startOffset += 4 |
| 1092 | firstBaseUV = cvtToUVS(uv) |
| 1093 | cnt = addtlCnt+1 |
Behdad Esfahbod | 97dea0a | 2013-11-27 03:34:48 -0500 | [diff] [blame] | 1094 | baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1095 | glyphList = [None]*cnt |
| 1096 | localUVList = zip(baseUVList, glyphList) |
| 1097 | try: |
| 1098 | uvsDict[varUVS].extend(localUVList) |
| 1099 | except KeyError: |
Behdad Esfahbod | fa5f2e8 | 2013-11-27 04:13:15 -0500 | [diff] [blame] | 1100 | uvsDict[varUVS] = list(localUVList) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1101 | |
| 1102 | if nonDefUVSOffset: |
| 1103 | startOffset = nonDefUVSOffset - 10 |
| 1104 | numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) |
| 1105 | startOffset +=4 |
| 1106 | localUVList = [] |
| 1107 | for r in range(numRecs): |
| 1108 | uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) |
| 1109 | startOffset += 5 |
| 1110 | uv = cvtToUVS(uv) |
| 1111 | glyphName = self.ttFont.getGlyphName(gid) |
| 1112 | localUVList.append( [uv, glyphName] ) |
| 1113 | try: |
| 1114 | uvsDict[varUVS].extend(localUVList) |
| 1115 | except KeyError: |
| 1116 | uvsDict[varUVS] = localUVList |
| 1117 | |
| 1118 | self.uvsDict = uvsDict |
| 1119 | |
| 1120 | def toXML(self, writer, ttFont): |
| 1121 | writer.begintag(self.__class__.__name__, [ |
| 1122 | ("platformID", self.platformID), |
| 1123 | ("platEncID", self.platEncID), |
| 1124 | ("format", self.format), |
| 1125 | ("length", self.length), |
| 1126 | ("numVarSelectorRecords", self.numVarSelectorRecords), |
| 1127 | ]) |
| 1128 | writer.newline() |
| 1129 | uvsDict = self.uvsDict |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 1130 | uvsList = sorted(uvsDict.keys()) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1131 | for uvs in uvsList: |
| 1132 | uvList = uvsDict[uvs] |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1133 | uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1134 | for uv, gname in uvList: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1135 | if gname is None: |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1136 | gname = "None" |
| 1137 | # I use the arg rather than th keyword syntax in order to preserve the attribute order. |
| 1138 | writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] ) |
| 1139 | writer.newline() |
| 1140 | writer.endtag(self.__class__.__name__) |
| 1141 | writer.newline() |
| 1142 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 1143 | def fromXML(self, name, attrs, content, ttFont): |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1144 | self.format = safeEval(attrs["format"]) |
| 1145 | self.length = safeEval(attrs["length"]) |
| 1146 | self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"]) |
Behdad Esfahbod | b7fd2e1 | 2013-11-27 18:58:45 -0500 | [diff] [blame] | 1147 | self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1148 | if not hasattr(self, "cmap"): |
| 1149 | self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. |
| 1150 | if not hasattr(self, "uvsDict"): |
| 1151 | self.uvsDict = {} |
| 1152 | uvsDict = self.uvsDict |
| 1153 | |
| 1154 | for element in content: |
Behdad Esfahbod | b774f9f | 2013-11-27 05:17:37 -0500 | [diff] [blame] | 1155 | if not isinstance(element, tuple): |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1156 | continue |
| 1157 | name, attrs, content = element |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 1158 | if name != "map": |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1159 | continue |
| 1160 | uvs = safeEval(attrs["uvs"]) |
| 1161 | uv = safeEval(attrs["uv"]) |
| 1162 | gname = attrs["name"] |
| 1163 | if gname == "None": |
| 1164 | gname = None |
| 1165 | try: |
| 1166 | uvsDict[uvs].append( [uv, gname]) |
| 1167 | except KeyError: |
| 1168 | uvsDict[uvs] = [ [uv, gname] ] |
| 1169 | |
| 1170 | |
| 1171 | def compile(self, ttFont): |
| 1172 | if self.data: |
| 1173 | return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data |
| 1174 | |
| 1175 | uvsDict = self.uvsDict |
Behdad Esfahbod | ac1b435 | 2013-11-27 04:15:34 -0500 | [diff] [blame] | 1176 | uvsList = sorted(uvsDict.keys()) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1177 | self.numVarSelectorRecords = len(uvsList) |
| 1178 | offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. |
| 1179 | data = [] |
| 1180 | varSelectorRecords =[] |
| 1181 | for uvs in uvsList: |
| 1182 | entryList = uvsDict[uvs] |
| 1183 | |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1184 | defList = [entry for entry in entryList if entry[1] is None] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1185 | if defList: |
Behdad Esfahbod | e5ca796 | 2013-11-27 04:38:16 -0500 | [diff] [blame] | 1186 | defList = [entry[0] for entry in defList] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1187 | defOVSOffset = offset |
| 1188 | defList.sort() |
| 1189 | |
| 1190 | lastUV = defList[0] |
| 1191 | cnt = -1 |
| 1192 | defRecs = [] |
| 1193 | for defEntry in defList: |
| 1194 | cnt +=1 |
| 1195 | if (lastUV+cnt) != defEntry: |
| 1196 | rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) |
| 1197 | lastUV = defEntry |
| 1198 | defRecs.append(rec) |
| 1199 | cnt = 0 |
| 1200 | |
| 1201 | rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) |
| 1202 | defRecs.append(rec) |
| 1203 | |
| 1204 | numDefRecs = len(defRecs) |
| 1205 | data.append(struct.pack(">L", numDefRecs)) |
| 1206 | data.extend(defRecs) |
| 1207 | offset += 4 + numDefRecs*4 |
| 1208 | else: |
| 1209 | defOVSOffset = 0 |
| 1210 | |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1211 | ndefList = [entry for entry in entryList if entry[1] is not None] |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1212 | if ndefList: |
| 1213 | nonDefUVSOffset = offset |
| 1214 | ndefList.sort() |
| 1215 | numNonDefRecs = len(ndefList) |
| 1216 | data.append(struct.pack(">L", numNonDefRecs)) |
| 1217 | offset += 4 + numNonDefRecs*5 |
| 1218 | |
| 1219 | for uv, gname in ndefList: |
| 1220 | gid = ttFont.getGlyphID(gname) |
| 1221 | ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) |
| 1222 | data.append(ndrec) |
| 1223 | else: |
| 1224 | nonDefUVSOffset = 0 |
| 1225 | |
| 1226 | vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) |
| 1227 | varSelectorRecords.append(vrec) |
| 1228 | |
Behdad Esfahbod | 18316aa | 2013-11-27 21:17:35 -0500 | [diff] [blame] | 1229 | data = bytesjoin(varSelectorRecords) + bytesjoin(data) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1230 | self.length = 10 + len(data) |
| 1231 | headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) |
| 1232 | self.data = headerdata + data |
| 1233 | |
| 1234 | return self.data |
| 1235 | |
| 1236 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1237 | class cmap_format_unknown(CmapSubtable): |
| 1238 | |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1239 | def toXML(self, writer, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1240 | cmapName = self.__class__.__name__[:12] + str(self.format) |
| 1241 | writer.begintag(cmapName, [ |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1242 | ("platformID", self.platformID), |
| 1243 | ("platEncID", self.platEncID), |
| 1244 | ]) |
| 1245 | writer.newline() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1246 | writer.dumphex(self.data) |
| 1247 | writer.endtag(cmapName) |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1248 | writer.newline() |
| 1249 | |
Behdad Esfahbod | 3a9fd30 | 2013-11-27 03:19:32 -0500 | [diff] [blame] | 1250 | def fromXML(self, name, attrs, content, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1251 | self.data = readHex(content) |
| 1252 | self.cmap = {} |
| 1253 | |
| 1254 | def decompileHeader(self, data, ttFont): |
jvr | 427f980 | 2004-09-26 18:32:50 +0000 | [diff] [blame] | 1255 | self.language = 0 # dummy value |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1256 | self.data = data |
| 1257 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1258 | def decompile(self, data, ttFont): |
| 1259 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 1260 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1261 | if data is not None and ttFont is not None: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1262 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 1263 | else: |
Behdad Esfahbod | 9e6ef94 | 2013-12-04 16:31:44 -0500 | [diff] [blame] | 1264 | assert (data is None and ttFont is None), "Need both data and ttFont arguments" |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1265 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1266 | def compile(self, ttFont): |
| 1267 | if self.data: |
| 1268 | return self.data |
| 1269 | else: |
| 1270 | return None |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1271 | |
| 1272 | cmap_classes = { |
| 1273 | 0: cmap_format_0, |
| 1274 | 2: cmap_format_2, |
| 1275 | 4: cmap_format_4, |
| 1276 | 6: cmap_format_6, |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1277 | 12: cmap_format_12, |
Roozbeh Pournader | 51a1782 | 2013-10-09 15:55:07 -0700 | [diff] [blame] | 1278 | 13: cmap_format_13, |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1279 | 14: cmap_format_14, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1280 | } |