jvr | 9be387c | 2008-03-01 11:43:01 +0000 | [diff] [blame] | 1 | import sys |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 2 | import DefaultTable |
| 3 | import struct |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 4 | import array |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 5 | import operator |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 6 | from fontTools import ttLib |
| 7 | from fontTools.misc.textTools import safeEval, readHex |
jvr | 22dcb9e | 2002-05-10 19:03:34 +0000 | [diff] [blame] | 8 | from types import TupleType |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 9 | |
| 10 | |
| 11 | class table__c_m_a_p(DefaultTable.DefaultTable): |
| 12 | |
| 13 | def getcmap(self, platformID, platEncID): |
| 14 | for subtable in self.tables: |
| 15 | if (subtable.platformID == platformID and |
| 16 | subtable.platEncID == platEncID): |
| 17 | return subtable |
| 18 | return None # not found |
| 19 | |
| 20 | def decompile(self, data, ttFont): |
| 21 | tableVersion, numSubTables = struct.unpack(">HH", data[:4]) |
| 22 | self.tableVersion = int(tableVersion) |
| 23 | self.tables = tables = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 24 | seenOffsets = {} |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 25 | for i in range(numSubTables): |
| 26 | platformID, platEncID, offset = struct.unpack( |
| 27 | ">HHl", data[4+i*8:4+(i+1)*8]) |
| 28 | platformID, platEncID = int(platformID), int(platEncID) |
| 29 | format, length = struct.unpack(">HH", data[offset:offset+4]) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 30 | if format in [8,10,12]: |
| 31 | format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 32 | elif format in [14]: |
| 33 | format, length = struct.unpack(">HL", data[offset:offset+6]) |
| 34 | |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 35 | if not length: |
| 36 | print "Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset) |
| 37 | continue |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 38 | if not cmap_classes.has_key(format): |
| 39 | table = cmap_format_unknown(format) |
| 40 | else: |
| 41 | table = cmap_classes[format](format) |
| 42 | table.platformID = platformID |
| 43 | table.platEncID = platEncID |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 44 | # Note that by default we decompile only the subtable header info; |
| 45 | # any other data gets decompiled only when an attribute of the |
| 46 | # subtable is referenced. |
| 47 | table.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 48 | if seenOffsets.has_key(offset): |
| 49 | table.cmap = tables[seenOffsets[offset]].cmap |
| 50 | else: |
| 51 | seenOffsets[offset] = i |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 52 | tables.append(table) |
| 53 | |
| 54 | def compile(self, ttFont): |
| 55 | self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__() |
| 56 | numSubTables = len(self.tables) |
| 57 | totalOffset = 4 + 8 * numSubTables |
| 58 | data = struct.pack(">HH", self.tableVersion, numSubTables) |
| 59 | tableData = "" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 60 | seen = {} # Some tables are the same object reference. Don't compile them twice. |
| 61 | done = {} # Some tables are different objects, but compile to the same data chunk |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 62 | for table in self.tables: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 63 | try: |
| 64 | offset = seen[id(table.cmap)] |
| 65 | except KeyError: |
| 66 | chunk = table.compile(ttFont) |
| 67 | if done.has_key(chunk): |
| 68 | offset = done[chunk] |
| 69 | else: |
| 70 | offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) |
| 71 | tableData = tableData + chunk |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 72 | data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) |
| 73 | return data + tableData |
| 74 | |
| 75 | def toXML(self, writer, ttFont): |
| 76 | writer.simpletag("tableVersion", version=self.tableVersion) |
| 77 | writer.newline() |
| 78 | for table in self.tables: |
| 79 | table.toXML(writer, ttFont) |
| 80 | |
| 81 | def fromXML(self, (name, attrs, content), ttFont): |
| 82 | if name == "tableVersion": |
| 83 | self.tableVersion = safeEval(attrs["version"]) |
| 84 | return |
| 85 | if name[:12] <> "cmap_format_": |
| 86 | return |
| 87 | if not hasattr(self, "tables"): |
| 88 | self.tables = [] |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 89 | format = safeEval(name[12:]) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 90 | if not cmap_classes.has_key(format): |
| 91 | table = cmap_format_unknown(format) |
| 92 | else: |
| 93 | table = cmap_classes[format](format) |
| 94 | table.platformID = safeEval(attrs["platformID"]) |
| 95 | table.platEncID = safeEval(attrs["platEncID"]) |
| 96 | table.fromXML((name, attrs, content), ttFont) |
| 97 | self.tables.append(table) |
| 98 | |
| 99 | |
| 100 | class CmapSubtable: |
| 101 | |
| 102 | def __init__(self, format): |
| 103 | self.format = format |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 104 | self.data = None |
| 105 | self.ttFont = None |
| 106 | |
| 107 | def __getattr__(self, attr): |
| 108 | # allow lazy decompilation of subtables. |
| 109 | if attr[:2] == '__': # don't handle requests for member functions like '__lt__' |
| 110 | raise AttributeError, attr |
| 111 | if self.data == None: |
| 112 | raise AttributeError, attr |
| 113 | self.decompile(None, None) # use saved data. |
| 114 | self.data = None # Once this table has been decompiled, make sure we don't |
| 115 | # just return the original data. Also avoids recursion when |
| 116 | # called with an attribute that the cmap subtable doesn't have. |
| 117 | return getattr(self, attr) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 118 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 119 | def decompileHeader(self, data, ttFont): |
| 120 | format, length, language = struct.unpack(">HHH", data[:6]) |
| 121 | assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) |
| 122 | self.format = int(format) |
| 123 | self.length = int(length) |
| 124 | self.language = int(language) |
| 125 | self.data = data[6:] |
| 126 | self.ttFont = ttFont |
| 127 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 128 | def toXML(self, writer, ttFont): |
| 129 | writer.begintag(self.__class__.__name__, [ |
| 130 | ("platformID", self.platformID), |
| 131 | ("platEncID", self.platEncID), |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 132 | ("language", self.language), |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 133 | ]) |
| 134 | writer.newline() |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 135 | codes = self.cmap.items() |
| 136 | codes.sort() |
| 137 | self._writeCodes(codes, writer) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 138 | writer.endtag(self.__class__.__name__) |
| 139 | writer.newline() |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 140 | |
| 141 | def _writeCodes(self, codes, writer): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 142 | if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0: |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 143 | from fontTools.unicode import Unicode |
| 144 | isUnicode = 1 |
| 145 | else: |
| 146 | isUnicode = 0 |
| 147 | for code, name in codes: |
| 148 | writer.simpletag("map", code=hex(code), name=name) |
| 149 | if isUnicode: |
| 150 | writer.comment(Unicode[code]) |
| 151 | writer.newline() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 152 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 153 | def __cmp__(self, other): |
| 154 | # implemented so that list.sort() sorts according to the cmap spec. |
| 155 | selfTuple = ( |
| 156 | self.platformID, |
| 157 | self.platEncID, |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 158 | self.language, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 159 | self.__dict__) |
| 160 | otherTuple = ( |
| 161 | other.platformID, |
| 162 | other.platEncID, |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 163 | other.language, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 164 | other.__dict__) |
| 165 | return cmp(selfTuple, otherTuple) |
| 166 | |
| 167 | |
| 168 | class cmap_format_0(CmapSubtable): |
| 169 | |
| 170 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 171 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 172 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
| 173 | if data != None and ttFont != None: |
| 174 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 175 | else: |
pabs3 | 17012aa | 2009-11-08 15:55:53 +0000 | [diff] [blame] | 176 | assert (data == None and ttFont == None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 177 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 178 | assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 179 | glyphIdArray = array.array("B") |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 180 | glyphIdArray.fromstring(self.data) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 181 | self.cmap = cmap = {} |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 182 | lenArray = len(glyphIdArray) |
| 183 | charCodes = range(lenArray) |
| 184 | names = map(self.ttFont.getGlyphName, glyphIdArray) |
| 185 | map(operator.setitem, [cmap]*lenArray, charCodes, names) |
| 186 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 187 | |
| 188 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 189 | if self.data: |
| 190 | return struct.pack(">HHH", 0, 262, self.language) + self.data |
| 191 | |
| 192 | charCodeList = self.cmap.items() |
| 193 | charCodeList.sort() |
| 194 | charCodes = [entry[0] for entry in charCodeList] |
| 195 | valueList = [entry[1] for entry in charCodeList] |
| 196 | assert charCodes == range(256) |
| 197 | valueList = map(ttFont.getGlyphID, valueList) |
| 198 | |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 199 | glyphIdArray = array.array("B", valueList) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 200 | data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 201 | assert len(data) == 262 |
| 202 | return data |
| 203 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 204 | def fromXML(self, (name, attrs, content), ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 205 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 206 | if not hasattr(self, "cmap"): |
| 207 | self.cmap = {} |
| 208 | cmap = self.cmap |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 209 | for element in content: |
jvr | 22dcb9e | 2002-05-10 19:03:34 +0000 | [diff] [blame] | 210 | if type(element) <> TupleType: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 211 | continue |
| 212 | name, attrs, content = element |
| 213 | if name <> "map": |
| 214 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 215 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 216 | |
| 217 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 218 | subHeaderFormat = ">HHhH" |
| 219 | class SubHeader: |
| 220 | def __init__(self): |
| 221 | self.firstCode = None |
| 222 | self.entryCount = None |
| 223 | self.idDelta = None |
| 224 | self.idRangeOffset = None |
| 225 | self.glyphIndexArray = [] |
| 226 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 227 | class cmap_format_2(CmapSubtable): |
| 228 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 229 | def setIDDelta(self, subHeader): |
| 230 | subHeader.idDelta = 0 |
| 231 | # find the minGI which is not zero. |
| 232 | minGI = subHeader.glyphIndexArray[0] |
| 233 | for gid in subHeader.glyphIndexArray: |
| 234 | if (gid != 0) and (gid < minGI): |
| 235 | minGI = gid |
| 236 | # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. |
| 237 | # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. |
| 238 | # We would like to pick an idDelta such that the first glyphArray GID is 1, |
| 239 | # so that we are more likely to be able to combine glypharray GID subranges. |
| 240 | # This means that we have a problem when minGI is > 32K |
| 241 | # Since the final gi is reconstructed from the glyphArray GID by: |
| 242 | # (short)finalGID = (gid + idDelta) % 0x10000), |
| 243 | # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the |
| 244 | # negative number to an unsigned short. |
| 245 | |
| 246 | if (minGI > 1): |
| 247 | if minGI > 0x7FFF: |
| 248 | subHeader.idDelta = -(0x10000 - minGI) -1 |
| 249 | else: |
| 250 | subHeader.idDelta = minGI -1 |
| 251 | idDelta = subHeader.idDelta |
| 252 | for i in range(subHeader.entryCount): |
| 253 | gid = subHeader.glyphIndexArray[i] |
| 254 | if gid > 0: |
| 255 | subHeader.glyphIndexArray[i] = gid - idDelta |
| 256 | |
| 257 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 258 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 259 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 260 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
| 261 | if data != None and ttFont != None: |
| 262 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 263 | else: |
pabs3 | 17012aa | 2009-11-08 15:55:53 +0000 | [diff] [blame] | 264 | assert (data == None and ttFont == None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 265 | |
| 266 | data = self.data # decompileHeader assigns the data after the header to self.data |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 267 | subHeaderKeys = [] |
| 268 | maxSubHeaderindex = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 269 | # get the key array, and determine the number of subHeaders. |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 270 | allKeys = array.array("H") |
| 271 | allKeys.fromstring(data[:512]) |
| 272 | data = data[512:] |
jvr | 9be387c | 2008-03-01 11:43:01 +0000 | [diff] [blame] | 273 | if sys.byteorder <> "big": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 274 | allKeys.byteswap() |
| 275 | subHeaderKeys = [ key/8 for key in allKeys] |
| 276 | maxSubHeaderindex = max(subHeaderKeys) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 277 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 278 | #Load subHeaders |
| 279 | subHeaderList = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 280 | pos = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 281 | for i in range(maxSubHeaderindex + 1): |
| 282 | subHeader = SubHeader() |
| 283 | (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 284 | subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) |
| 285 | pos += 8 |
| 286 | giDataPos = pos + subHeader.idRangeOffset-2 |
| 287 | giList = array.array("H") |
| 288 | giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) |
jvr | 9be387c | 2008-03-01 11:43:01 +0000 | [diff] [blame] | 289 | if sys.byteorder <> "big": |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 290 | giList.byteswap() |
| 291 | subHeader.glyphIndexArray = giList |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 292 | subHeaderList.append(subHeader) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 293 | # How this gets processed. |
| 294 | # Charcodes may be one or two bytes. |
| 295 | # The first byte of a charcode is mapped through the subHeaderKeys, to select |
| 296 | # a subHeader. For any subheader but 0, the next byte is then mapped through the |
| 297 | # selected subheader. If subheader Index 0 is selected, then the byte itself is |
| 298 | # mapped through the subheader, and there is no second byte. |
| 299 | # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. |
| 300 | # |
| 301 | # Each subheader references a range in the glyphIndexArray whose length is entryCount. |
| 302 | # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray |
| 303 | # referenced by another subheader. |
| 304 | # The only subheader that will be referenced by more than one first-byte value is the subheader |
| 305 | # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: |
| 306 | # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} |
| 307 | # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. |
| 308 | # A subheader specifies a subrange within (0...256) by the |
| 309 | # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero |
| 310 | # (e.g. glyph not in font). |
| 311 | # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). |
| 312 | # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by |
| 313 | # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the |
| 314 | # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. |
| 315 | # Example for Logocut-Medium |
| 316 | # first byte of charcode = 129; selects subheader 1. |
| 317 | # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} |
| 318 | # second byte of charCode = 66 |
| 319 | # the index offset = 66-64 = 2. |
| 320 | # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: |
| 321 | # [glyphIndexArray index], [subrange array index] = glyphIndex |
| 322 | # [256], [0]=1 from charcode [129, 64] |
| 323 | # [257], [1]=2 from charcode [129, 65] |
| 324 | # [258], [2]=3 from charcode [129, 66] |
| 325 | # [259], [3]=4 from charcode [129, 67] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 326 | # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, |
| 327 | # add it to the glyphID to get the final glyphIndex |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 328 | # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 329 | |
| 330 | self.data = "" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 331 | self.cmap = cmap = {} |
| 332 | notdefGI = 0 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 333 | for firstByte in range(256): |
| 334 | subHeadindex = subHeaderKeys[firstByte] |
| 335 | subHeader = subHeaderList[subHeadindex] |
| 336 | if subHeadindex == 0: |
| 337 | if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 338 | continue # gi is notdef. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 339 | else: |
| 340 | charCode = firstByte |
| 341 | offsetIndex = firstByte - subHeader.firstCode |
| 342 | gi = subHeader.glyphIndexArray[offsetIndex] |
| 343 | if gi != 0: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 344 | gi = (gi + subHeader.idDelta) % 0x10000 |
| 345 | else: |
| 346 | continue # gi is notdef. |
| 347 | cmap[charCode] = gi |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 348 | else: |
| 349 | if subHeader.entryCount: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 350 | charCodeOffset = firstByte * 256 + subHeader.firstCode |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 351 | for offsetIndex in range(subHeader.entryCount): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 352 | charCode = charCodeOffset + offsetIndex |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 353 | gi = subHeader.glyphIndexArray[offsetIndex] |
| 354 | if gi != 0: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 355 | gi = (gi + subHeader.idDelta) % 0x10000 |
| 356 | else: |
| 357 | continue |
| 358 | cmap[charCode] = gi |
| 359 | # If not subHeader.entryCount, then all char codes with this first byte are |
| 360 | # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the |
| 361 | # same as mapping it to .notdef. |
| 362 | # cmap values are GID's. |
| 363 | glyphOrder = self.ttFont.getGlyphOrder() |
| 364 | gids = cmap.values() |
| 365 | charCodes = cmap.keys() |
| 366 | lenCmap = len(gids) |
| 367 | try: |
| 368 | names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) |
| 369 | except IndexError: |
| 370 | getGlyphName = self.ttFont.getGlyphName |
| 371 | names = map(getGlyphName, gids ) |
| 372 | map(operator.setitem, [cmap]*lenCmap, charCodes, names) |
| 373 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 374 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 375 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 376 | if self.data: |
| 377 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 378 | kEmptyTwoCharCodeRange = -1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 379 | notdefGI = 0 |
| 380 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 381 | items = self.cmap.items() |
| 382 | items.sort() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 383 | charCodes = [item[0] for item in items] |
| 384 | names = [item[1] for item in items] |
| 385 | nameMap = ttFont.getReverseGlyphMap() |
| 386 | lenCharCodes = len(charCodes) |
| 387 | try: |
| 388 | gids = map(operator.getitem, [nameMap]*lenCharCodes, names) |
| 389 | except KeyError: |
| 390 | nameMap = ttFont.getReverseGlyphMap(rebuild=1) |
| 391 | try: |
| 392 | gids = map(operator.getitem, [nameMap]*lenCharCodes, names) |
| 393 | except KeyError: |
| 394 | # allow virtual GIDs in format 2 tables |
| 395 | gids = [] |
| 396 | for name in names: |
| 397 | try: |
| 398 | gid = nameMap[name] |
| 399 | except KeyError: |
| 400 | try: |
| 401 | if (name[:3] == 'gid'): |
| 402 | gid = eval(name[3:]) |
| 403 | else: |
| 404 | gid = ttFont.getGlyphID(name) |
| 405 | except: |
| 406 | raise KeyError(name) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 407 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 408 | gids.append(gid) |
| 409 | |
| 410 | # Process the (char code to gid) item list in char code order. |
| 411 | # By definition, all one byte char codes map to subheader 0. |
| 412 | # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, |
| 413 | # which defines all char codes in its range to map to notdef) unless proven otherwise. |
| 414 | # Note that since the char code items are processed in char code order, all the char codes with the |
| 415 | # same first byte are in sequential order. |
| 416 | |
| 417 | subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 418 | subHeaderList = [] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 419 | |
| 420 | # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up |
| 421 | # with a cmap where all the one byte char codes map to notdef, |
| 422 | # with the result that the subhead 0 would not get created just by processing the item list. |
| 423 | charCode = charCodes[0] |
| 424 | if charCode > 255: |
| 425 | subHeader = SubHeader() |
| 426 | subHeader.firstCode = 0 |
| 427 | subHeader.entryCount = 0 |
| 428 | subHeader.idDelta = 0 |
| 429 | subHeader.idRangeOffset = 0 |
| 430 | subHeaderList.append(subHeader) |
| 431 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 432 | |
| 433 | lastFirstByte = -1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 434 | items = zip(charCodes, gids) |
| 435 | for charCode, gid in items: |
| 436 | if gid == 0: |
| 437 | continue |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 438 | firstbyte = charCode >> 8 |
| 439 | secondByte = charCode & 0x00FF |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 440 | |
| 441 | if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 442 | if lastFirstByte > -1: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 443 | # fix GI's and iDelta of current subheader. |
| 444 | self.setIDDelta(subHeader) |
| 445 | |
| 446 | # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero |
| 447 | # for the indices matching the char codes. |
| 448 | if lastFirstByte == 0: |
| 449 | for index in range(subHeader.entryCount): |
| 450 | charCode = subHeader.firstCode + index |
| 451 | subHeaderKeys[charCode] = 0 |
| 452 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 453 | assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." |
| 454 | # init new subheader |
| 455 | subHeader = SubHeader() |
| 456 | subHeader.firstCode = secondByte |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 457 | subHeader.entryCount = 1 |
| 458 | subHeader.glyphIndexArray.append(gid) |
| 459 | subHeaderList.append(subHeader) |
| 460 | subHeaderKeys[firstbyte] = len(subHeaderList) -1 |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 461 | lastFirstByte = firstbyte |
| 462 | else: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 463 | # need to fill in with notdefs all the code points between the last charCode and the current charCode. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 464 | codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) |
| 465 | for i in range(codeDiff): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 466 | subHeader.glyphIndexArray.append(notdefGI) |
| 467 | subHeader.glyphIndexArray.append(gid) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 468 | subHeader.entryCount = subHeader.entryCount + codeDiff + 1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 469 | |
| 470 | # fix GI's and iDelta of last subheader that we we added to the subheader array. |
| 471 | self.setIDDelta(subHeader) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 472 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 473 | # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 474 | subHeader = SubHeader() |
| 475 | subHeader.firstCode = 0 |
| 476 | subHeader.entryCount = 0 |
| 477 | subHeader.idDelta = 0 |
| 478 | subHeader.idRangeOffset = 2 |
| 479 | subHeaderList.append(subHeader) |
| 480 | emptySubheadIndex = len(subHeaderList) - 1 |
| 481 | for index in range(256): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 482 | if subHeaderKeys[index] == kEmptyTwoCharCodeRange: |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 483 | subHeaderKeys[index] = emptySubheadIndex |
| 484 | # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 485 | # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 486 | # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with |
| 487 | # charcode 0 and GID 0. |
| 488 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 489 | idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 490 | subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. |
| 491 | for index in range(subheadRangeLen): |
| 492 | subHeader = subHeaderList[index] |
| 493 | subHeader.idRangeOffset = 0 |
| 494 | for j in range(index): |
| 495 | prevSubhead = subHeaderList[j] |
| 496 | if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray |
| 497 | subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 |
| 498 | subHeader.glyphIndexArray = [] |
| 499 | break |
| 500 | if subHeader.idRangeOffset == 0: # didn't find one. |
| 501 | subHeader.idRangeOffset = idRangeOffset |
| 502 | idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. |
| 503 | else: |
| 504 | idRangeOffset = idRangeOffset - 8 # one less subheader |
| 505 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 506 | # Now we can write out the data! |
| 507 | length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. |
| 508 | for subhead in subHeaderList[:-1]: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 509 | length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. |
| 510 | dataList = [struct.pack(">HHH", 2, length, self.language)] |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 511 | for index in subHeaderKeys: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 512 | dataList.append(struct.pack(">H", index*8)) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 513 | for subhead in subHeaderList: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 514 | dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 515 | for subhead in subHeaderList[:-1]: |
| 516 | for gi in subhead.glyphIndexArray: |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 517 | dataList.append(struct.pack(">H", gi)) |
| 518 | data = "".join(dataList) |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 519 | assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) |
| 520 | return data |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 521 | |
| 522 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 523 | def fromXML(self, (name, attrs, content), ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 524 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 525 | if not hasattr(self, "cmap"): |
| 526 | self.cmap = {} |
| 527 | cmap = self.cmap |
| 528 | |
jvr | bafa66e | 2003-08-28 18:04:23 +0000 | [diff] [blame] | 529 | for element in content: |
| 530 | if type(element) <> TupleType: |
| 531 | continue |
| 532 | name, attrs, content = element |
| 533 | if name <> "map": |
| 534 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 535 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 536 | |
| 537 | |
| 538 | cmap_format_4_format = ">7H" |
| 539 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 540 | #uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. |
| 541 | #uint16 reservedPad # This value should be zero |
| 542 | #uint16 startCode[segCount] # Starting character code for each segment |
| 543 | #uint16 idDelta[segCount] # Delta for all character codes in segment |
| 544 | #uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 |
| 545 | #uint16 glyphIndexArray[variable] # Glyph index array |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 546 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 547 | def splitRange(startCode, endCode, cmap): |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 548 | # Try to split a range of character codes into subranges with consecutive |
| 549 | # glyph IDs in such a way that the cmap4 subtable can be stored "most" |
| 550 | # efficiently. I can't prove I've got the optimal solution, but it seems |
| 551 | # to do well with the fonts I tested: none became bigger, many became smaller. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 552 | if startCode == endCode: |
| 553 | return [], [endCode] |
| 554 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 555 | lastID = cmap[startCode] |
| 556 | lastCode = startCode |
| 557 | inOrder = None |
| 558 | orderedBegin = None |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 559 | subRanges = [] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 560 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 561 | # Gather subranges in which the glyph IDs are consecutive. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 562 | for code in range(startCode + 1, endCode + 1): |
| 563 | glyphID = cmap[code] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 564 | |
| 565 | if glyphID - 1 == lastID: |
| 566 | if inOrder is None or not inOrder: |
| 567 | inOrder = 1 |
| 568 | orderedBegin = lastCode |
| 569 | else: |
| 570 | if inOrder: |
| 571 | inOrder = 0 |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 572 | subRanges.append((orderedBegin, lastCode)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 573 | orderedBegin = None |
| 574 | |
| 575 | lastID = glyphID |
| 576 | lastCode = code |
| 577 | |
| 578 | if inOrder: |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 579 | subRanges.append((orderedBegin, lastCode)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 580 | assert lastCode == endCode |
| 581 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 582 | # Now filter out those new subranges that would only make the data bigger. |
| 583 | # A new segment cost 8 bytes, not using a new segment costs 2 bytes per |
| 584 | # character. |
| 585 | newRanges = [] |
| 586 | for b, e in subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 587 | if b == startCode and e == endCode: |
| 588 | break # the whole range, we're fine |
| 589 | if b == startCode or e == endCode: |
| 590 | threshold = 4 # split costs one more segment |
| 591 | else: |
| 592 | threshold = 8 # split costs two more segments |
| 593 | if (e - b + 1) > threshold: |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 594 | newRanges.append((b, e)) |
| 595 | subRanges = newRanges |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 596 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 597 | if not subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 598 | return [], [endCode] |
| 599 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 600 | if subRanges[0][0] != startCode: |
| 601 | subRanges.insert(0, (startCode, subRanges[0][0] - 1)) |
| 602 | if subRanges[-1][1] != endCode: |
| 603 | subRanges.append((subRanges[-1][1] + 1, endCode)) |
| 604 | |
| 605 | # Fill the "holes" in the segments list -- those are the segments in which |
| 606 | # the glyph IDs are _not_ consecutive. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 607 | i = 1 |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 608 | while i < len(subRanges): |
| 609 | if subRanges[i-1][1] + 1 != subRanges[i][0]: |
| 610 | subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 611 | i = i + 1 |
| 612 | i = i + 1 |
| 613 | |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 614 | # Transform the ranges into startCode/endCode lists. |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 615 | start = [] |
| 616 | end = [] |
jvr | 1f8a4bb | 2002-07-23 07:51:23 +0000 | [diff] [blame] | 617 | for b, e in subRanges: |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 618 | start.append(b) |
| 619 | end.append(e) |
| 620 | start.pop(0) |
| 621 | |
| 622 | assert len(start) + 1 == len(end) |
| 623 | return start, end |
| 624 | |
| 625 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 626 | class cmap_format_4(CmapSubtable): |
| 627 | |
| 628 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 629 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 630 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
| 631 | if data != None and ttFont != None: |
| 632 | self.decompileHeader(self.data[offset:offset+int(length)], ttFont) |
| 633 | else: |
pabs3 | 17012aa | 2009-11-08 15:55:53 +0000 | [diff] [blame] | 634 | assert (data == None and ttFont == None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 635 | |
| 636 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 637 | (segCountX2, searchRange, entrySelector, rangeShift) = \ |
| 638 | struct.unpack(">4H", data[:8]) |
| 639 | data = data[8:] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 640 | segCount = segCountX2 / 2 |
| 641 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 642 | allCodes = array.array("H") |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 643 | allCodes.fromstring(data) |
| 644 | self.data = data = None |
| 645 | |
jvr | 9be387c | 2008-03-01 11:43:01 +0000 | [diff] [blame] | 646 | if sys.byteorder <> "big": |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 647 | allCodes.byteswap() |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 648 | |
| 649 | # divide the data |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 650 | endCode = allCodes[:segCount] |
| 651 | allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field |
| 652 | startCode = allCodes[:segCount] |
| 653 | allCodes = allCodes[segCount:] |
| 654 | idDelta = allCodes[:segCount] |
| 655 | allCodes = allCodes[segCount:] |
| 656 | idRangeOffset = allCodes[:segCount] |
| 657 | glyphIndexArray = allCodes[segCount:] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 658 | lenGIArray = len(glyphIndexArray) |
| 659 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 660 | # build 2-byte character mapping |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 661 | charCodes = [] |
| 662 | gids = [] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 663 | for i in range(len(startCode) - 1): # don't do 0xffff! |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 664 | rangeCharCodes = range(startCode[i], endCode[i] + 1) |
| 665 | charCodes = charCodes + rangeCharCodes |
| 666 | for charCode in rangeCharCodes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 667 | rangeOffset = idRangeOffset[i] |
| 668 | if rangeOffset == 0: |
| 669 | glyphID = charCode + idDelta[i] |
| 670 | else: |
| 671 | # *someone* needs to get killed. |
| 672 | index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 673 | assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 674 | if glyphIndexArray[index] <> 0: # if not missing glyph |
| 675 | glyphID = glyphIndexArray[index] + idDelta[i] |
| 676 | else: |
| 677 | glyphID = 0 # missing glyph |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 678 | gids.append(glyphID % 0x10000) |
| 679 | |
| 680 | self.cmap = cmap = {} |
| 681 | lenCmap = len(gids) |
| 682 | glyphOrder = self.ttFont.getGlyphOrder() |
| 683 | try: |
| 684 | names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) |
| 685 | except IndexError: |
| 686 | getGlyphName = self.ttFont.getGlyphName |
| 687 | names = map(getGlyphName, gids ) |
| 688 | map(operator.setitem, [cmap]*lenCmap, charCodes, names) |
| 689 | |
| 690 | |
| 691 | |
| 692 | def setIDDelta(self, idDelta): |
| 693 | # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. |
| 694 | # idDelta is a short, and must be between -32K and 32K |
| 695 | # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 |
| 696 | # This means that we have a problem because we can need to assign to idDelta values |
| 697 | # between -(64K-2) and 64K -1. |
| 698 | # Since the final gi is reconstructed from the glyphArray GID by: |
| 699 | # (short)finalGID = (gid + idDelta) % 0x10000), |
| 700 | # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the |
| 701 | # negative number to an unsigned short. |
| 702 | # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of |
| 703 | # the modulo arithmetic. |
| 704 | |
| 705 | if idDelta > 0x7FFF: |
| 706 | idDelta = idDelta - 0x10000 |
| 707 | elif idDelta < -0x7FFF: |
| 708 | idDelta = idDelta + 0x10000 |
| 709 | |
| 710 | return idDelta |
| 711 | |
| 712 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 713 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 714 | if self.data: |
| 715 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
| 716 | |
jvr | ea9dfa9 | 2002-05-12 17:14:50 +0000 | [diff] [blame] | 717 | from fontTools.ttLib.sfnt import maxPowerOfTwo |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 718 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 719 | charCodes = self.cmap.keys() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 720 | lenCharCodes = len(charCodes) |
| 721 | if lenCharCodes == 0: |
| 722 | startCode = [0xffff] |
| 723 | endCode = [0xffff] |
| 724 | else: |
jvr | 2db352c | 2008-02-29 14:43:49 +0000 | [diff] [blame] | 725 | charCodes.sort() |
| 726 | names = map(operator.getitem, [self.cmap]*lenCharCodes, charCodes) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 727 | nameMap = ttFont.getReverseGlyphMap() |
| 728 | try: |
| 729 | gids = map(operator.getitem, [nameMap]*lenCharCodes, names) |
| 730 | except KeyError: |
| 731 | nameMap = ttFont.getReverseGlyphMap(rebuild=1) |
| 732 | try: |
| 733 | gids = map(operator.getitem, [nameMap]*lenCharCodes, names) |
| 734 | except KeyError: |
| 735 | # allow virtual GIDs in format 4 tables |
| 736 | gids = [] |
| 737 | for name in names: |
| 738 | try: |
| 739 | gid = nameMap[name] |
| 740 | except KeyError: |
| 741 | try: |
| 742 | if (name[:3] == 'gid'): |
| 743 | gid = eval(name[3:]) |
| 744 | else: |
| 745 | gid = ttFont.getGlyphID(name) |
| 746 | except: |
| 747 | raise KeyError(name) |
| 748 | |
| 749 | gids.append(gid) |
| 750 | cmap = {} # code:glyphID mapping |
| 751 | map(operator.setitem, [cmap]*len(charCodes), charCodes, gids) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 752 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 753 | # Build startCode and endCode lists. |
| 754 | # Split the char codes in ranges of consecutive char codes, then split |
| 755 | # each range in more ranges of consecutive/not consecutive glyph IDs. |
| 756 | # See splitRange(). |
| 757 | lastCode = charCodes[0] |
| 758 | endCode = [] |
| 759 | startCode = [lastCode] |
| 760 | for charCode in charCodes[1:]: # skip the first code, it's the first start code |
| 761 | if charCode == lastCode + 1: |
| 762 | lastCode = charCode |
| 763 | continue |
| 764 | start, end = splitRange(startCode[-1], lastCode, cmap) |
| 765 | startCode.extend(start) |
| 766 | endCode.extend(end) |
| 767 | startCode.append(charCode) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 768 | lastCode = charCode |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 769 | endCode.append(lastCode) |
| 770 | startCode.append(0xffff) |
| 771 | endCode.append(0xffff) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 772 | |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 773 | # build up rest of cruft |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 774 | idDelta = [] |
| 775 | idRangeOffset = [] |
| 776 | glyphIndexArray = [] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 777 | for i in range(len(endCode)-1): # skip the closing codes (0xffff) |
| 778 | indices = [] |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 779 | for charCode in range(startCode[i], endCode[i] + 1): |
| 780 | indices.append(cmap[charCode]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 781 | if (indices == range(indices[0], indices[0] + len(indices))): |
| 782 | idDeltaTemp = self.setIDDelta(indices[0] - startCode[i]) |
| 783 | idDelta.append( idDeltaTemp) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 784 | idRangeOffset.append(0) |
| 785 | else: |
| 786 | # someone *definitely* needs to get killed. |
| 787 | idDelta.append(0) |
| 788 | idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 789 | glyphIndexArray.extend(indices) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 790 | idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef |
| 791 | idRangeOffset.append(0) |
| 792 | |
| 793 | # Insane. |
| 794 | segCount = len(endCode) |
| 795 | segCountX2 = segCount * 2 |
jvr | 542b951 | 2002-07-20 21:57:26 +0000 | [diff] [blame] | 796 | maxExponent = maxPowerOfTwo(segCount) |
| 797 | searchRange = 2 * (2 ** maxExponent) |
| 798 | entrySelector = maxExponent |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 799 | rangeShift = 2 * segCount - searchRange |
| 800 | |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 801 | charCodeArray = array.array("H", endCode + [0] + startCode) |
| 802 | idDeltaeArray = array.array("h", idDelta) |
| 803 | restArray = array.array("H", idRangeOffset + glyphIndexArray) |
jvr | 9be387c | 2008-03-01 11:43:01 +0000 | [diff] [blame] | 804 | if sys.byteorder <> "big": |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 805 | charCodeArray.byteswap() |
| 806 | idDeltaeArray.byteswap() |
| 807 | restArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 808 | data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring() |
| 809 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 810 | length = struct.calcsize(cmap_format_4_format) + len(data) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 811 | header = struct.pack(cmap_format_4_format, self.format, length, self.language, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 812 | segCountX2, searchRange, entrySelector, rangeShift) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 813 | return header + data |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 814 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 815 | def fromXML(self, (name, attrs, content), ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 816 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 817 | if not hasattr(self, "cmap"): |
| 818 | self.cmap = {} |
| 819 | cmap = self.cmap |
| 820 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 821 | for element in content: |
jvr | 22dcb9e | 2002-05-10 19:03:34 +0000 | [diff] [blame] | 822 | if type(element) <> TupleType: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 823 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 824 | nameMap, attrsMap, dummyContent = element |
| 825 | if nameMap <> "map": |
| 826 | assert 0, "Unrecognized keyword in cmap subtable" |
| 827 | cmap[safeEval(attrsMap["code"])] = attrsMap["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 828 | |
| 829 | |
| 830 | class cmap_format_6(CmapSubtable): |
| 831 | |
| 832 | def decompile(self, data, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 833 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 834 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
| 835 | if data != None and ttFont != None: |
| 836 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 837 | else: |
pabs3 | 17012aa | 2009-11-08 15:55:53 +0000 | [diff] [blame] | 838 | assert (data == None and ttFont == None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 839 | |
| 840 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 841 | firstCode, entryCount = struct.unpack(">HH", data[:4]) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 842 | firstCode = int(firstCode) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 843 | data = data[4:] |
Just | f6b1563 | 2000-08-23 12:33:14 +0000 | [diff] [blame] | 844 | #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 845 | glyphIndexArray = array.array("H") |
Just | 43fa4be | 2000-10-11 18:04:03 +0000 | [diff] [blame] | 846 | glyphIndexArray.fromstring(data[:2 * int(entryCount)]) |
jvr | 9be387c | 2008-03-01 11:43:01 +0000 | [diff] [blame] | 847 | if sys.byteorder <> "big": |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 848 | glyphIndexArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 849 | self.data = data = None |
| 850 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 851 | self.cmap = cmap = {} |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 852 | |
| 853 | lenArray = len(glyphIndexArray) |
| 854 | charCodes = range(firstCode, firstCode + lenArray ) |
| 855 | glyphOrder = self.ttFont.getGlyphOrder() |
| 856 | try: |
| 857 | names = map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ) |
| 858 | except IndexError: |
| 859 | getGlyphName = self.ttFont.getGlyphName |
| 860 | names = map(getGlyphName, glyphIndexArray ) |
| 861 | map(operator.setitem, [cmap]*lenArray, charCodes, names) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 862 | |
| 863 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 864 | if self.data: |
| 865 | return struct.pack(">HHH", self.format, self.length, self.language) + self.data |
| 866 | cmap = self.cmap |
| 867 | codes = cmap.keys() |
| 868 | if codes: # yes, there are empty cmap tables. |
| 869 | codes.sort() |
| 870 | lenCodes = len(codes) |
| 871 | assert codes == range(codes[0], codes[0] + lenCodes) |
| 872 | firstCode = codes[0] |
| 873 | valueList = map(operator.getitem, [cmap]*lenCodes, codes) |
| 874 | valueList = map(ttFont.getGlyphID, valueList) |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 875 | glyphIndexArray = array.array("H", valueList) |
jvr | 9be387c | 2008-03-01 11:43:01 +0000 | [diff] [blame] | 876 | if sys.byteorder <> "big": |
Behdad Esfahbod | 8da8242 | 2013-08-16 12:56:08 -0400 | [diff] [blame] | 877 | glyphIndexArray.byteswap() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 878 | data = glyphIndexArray.tostring() |
| 879 | else: |
| 880 | data = "" |
| 881 | firstCode = 0 |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 882 | header = struct.pack(">HHHHH", |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 883 | 6, len(data) + 10, self.language, firstCode, len(codes)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 884 | return header + data |
| 885 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 886 | def fromXML(self, (name, attrs, content), ttFont): |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 887 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 888 | if not hasattr(self, "cmap"): |
| 889 | self.cmap = {} |
| 890 | cmap = self.cmap |
| 891 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 892 | for element in content: |
jvr | 22dcb9e | 2002-05-10 19:03:34 +0000 | [diff] [blame] | 893 | if type(element) <> TupleType: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 894 | continue |
| 895 | name, attrs, content = element |
| 896 | if name <> "map": |
| 897 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 898 | cmap[safeEval(attrs["code"])] = attrs["name"] |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 899 | |
| 900 | |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 901 | class cmap_format_12(CmapSubtable): |
| 902 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 903 | def __init__(self, format): |
| 904 | self.format = format |
| 905 | self.reserved = 0 |
| 906 | self.data = None |
| 907 | self.ttFont = None |
| 908 | |
| 909 | def decompileHeader(self, data, ttFont): |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 910 | format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 911 | assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format 12 (data length: %d, header length: %d)" % (len(data), length) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 912 | self.format = format |
| 913 | self.reserved = reserved |
| 914 | self.length = length |
| 915 | self.language = language |
| 916 | self.nGroups = nGroups |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 917 | self.data = data[16:] |
| 918 | self.ttFont = ttFont |
| 919 | |
| 920 | def decompile(self, data, ttFont): |
| 921 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 922 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
| 923 | if data != None and ttFont != None: |
| 924 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 925 | else: |
pabs3 | 17012aa | 2009-11-08 15:55:53 +0000 | [diff] [blame] | 926 | assert (data == None and ttFont == None), "Need both data and ttFont arguments" |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 927 | |
| 928 | data = self.data # decompileHeader assigns the data after the header to self.data |
| 929 | charCodes = [] |
| 930 | gids = [] |
| 931 | pos = 0 |
| 932 | for i in range(self.nGroups): |
| 933 | startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) |
| 934 | pos += 12 |
| 935 | lenGroup = 1 + endCharCode - startCharCode |
| 936 | charCodes += range(startCharCode, endCharCode +1) |
| 937 | gids += range(glyphID, glyphID + lenGroup) |
| 938 | self.data = data = None |
| 939 | self.cmap = cmap = {} |
| 940 | lenCmap = len(gids) |
| 941 | glyphOrder = self.ttFont.getGlyphOrder() |
| 942 | try: |
| 943 | names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) |
| 944 | except IndexError: |
| 945 | getGlyphName = self.ttFont.getGlyphName |
| 946 | names = map(getGlyphName, gids ) |
| 947 | map(operator.setitem, [cmap]*lenCmap, charCodes, names) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 948 | |
| 949 | def compile(self, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 950 | if self.data: |
| 951 | return struct.pack(">HHLLL", self.format, self.reserved , self.length, self.language, self.nGroups) + self.data |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 952 | charCodes = self.cmap.keys() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 953 | lenCharCodes = len(charCodes) |
| 954 | names = self.cmap.values() |
| 955 | nameMap = ttFont.getReverseGlyphMap() |
| 956 | try: |
| 957 | gids = map(operator.getitem, [nameMap]*lenCharCodes, names) |
| 958 | except KeyError: |
| 959 | nameMap = ttFont.getReverseGlyphMap(rebuild=1) |
| 960 | try: |
| 961 | gids = map(operator.getitem, [nameMap]*lenCharCodes, names) |
| 962 | except KeyError: |
| 963 | # allow virtual GIDs in format 12 tables |
| 964 | gids = [] |
| 965 | for name in names: |
| 966 | try: |
| 967 | gid = nameMap[name] |
| 968 | except KeyError: |
| 969 | try: |
| 970 | if (name[:3] == 'gid'): |
| 971 | gid = eval(name[3:]) |
| 972 | else: |
| 973 | gid = ttFont.getGlyphID(name) |
| 974 | except: |
| 975 | raise KeyError(name) |
| 976 | |
| 977 | gids.append(gid) |
| 978 | |
| 979 | cmap = {} # code:glyphID mapping |
| 980 | map(operator.setitem, [cmap]*len(charCodes), charCodes, gids) |
| 981 | |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 982 | charCodes.sort() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 983 | index = 0 |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 984 | startCharCode = charCodes[0] |
| 985 | startGlyphID = cmap[startCharCode] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 986 | lastGlyphID = startGlyphID - 1 |
| 987 | lastCharCode = startCharCode - 1 |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 988 | nGroups = 0 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 989 | dataList = [] |
| 990 | maxIndex = len(charCodes) |
| 991 | for index in range(maxIndex): |
| 992 | charCode = charCodes[index] |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 993 | glyphID = cmap[charCode] |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 994 | if (glyphID != 1 + lastGlyphID) or (charCode != 1 + lastCharCode): |
| 995 | dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 996 | startCharCode = charCode |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 997 | startGlyphID = glyphID |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 998 | nGroups = nGroups + 1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 999 | lastGlyphID = glyphID |
| 1000 | lastCharCode = charCode |
| 1001 | dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) |
jvr | 0cd79a5 | 2004-09-25 07:30:47 +0000 | [diff] [blame] | 1002 | nGroups = nGroups + 1 |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1003 | data = "".join(dataList) |
| 1004 | lengthSubtable = len(data) +16 |
| 1005 | assert len(data) == (nGroups*12) == (lengthSubtable-16) |
| 1006 | return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1007 | |
| 1008 | def toXML(self, writer, ttFont): |
| 1009 | writer.begintag(self.__class__.__name__, [ |
| 1010 | ("platformID", self.platformID), |
| 1011 | ("platEncID", self.platEncID), |
| 1012 | ("format", self.format), |
| 1013 | ("reserved", self.reserved), |
| 1014 | ("length", self.length), |
| 1015 | ("language", self.language), |
| 1016 | ("nGroups", self.nGroups), |
| 1017 | ]) |
| 1018 | writer.newline() |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1019 | codes = self.cmap.items() |
| 1020 | codes.sort() |
| 1021 | self._writeCodes(codes, writer) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1022 | writer.endtag(self.__class__.__name__) |
| 1023 | writer.newline() |
| 1024 | |
| 1025 | def fromXML(self, (name, attrs, content), ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1026 | self.format = safeEval(attrs["format"]) |
| 1027 | self.reserved = safeEval(attrs["reserved"]) |
| 1028 | self.length = safeEval(attrs["length"]) |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1029 | self.language = safeEval(attrs["language"]) |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1030 | self.nGroups = safeEval(attrs["nGroups"]) |
| 1031 | if not hasattr(self, "cmap"): |
| 1032 | self.cmap = {} |
| 1033 | cmap = self.cmap |
| 1034 | |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1035 | for element in content: |
| 1036 | if type(element) <> TupleType: |
| 1037 | continue |
| 1038 | name, attrs, content = element |
| 1039 | if name <> "map": |
| 1040 | continue |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1041 | cmap[safeEval(attrs["code"])] = attrs["name"] |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1042 | |
| 1043 | |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1044 | def cvtToUVS(threeByteString): |
| 1045 | if sys.byteorder <> "big": |
| 1046 | data = "\0" +threeByteString |
| 1047 | else: |
| 1048 | data = threeByteString + "\0" |
| 1049 | val, = struct.unpack(">L", data) |
| 1050 | return val |
| 1051 | |
| 1052 | def cvtFromUVS(val): |
| 1053 | if sys.byteorder <> "big": |
| 1054 | threeByteString = struct.pack(">L", val)[1:] |
| 1055 | else: |
| 1056 | threeByteString = struct.pack(">L", val)[:3] |
| 1057 | return threeByteString |
| 1058 | |
| 1059 | def cmpUVSListEntry(first, second): |
| 1060 | uv1, glyphName1 = first |
| 1061 | uv2, glyphName2 = second |
| 1062 | |
| 1063 | if (glyphName1 == None) and (glyphName2 != None): |
| 1064 | return -1 |
| 1065 | elif (glyphName2 == None) and (glyphName1 != None): |
| 1066 | return 1 |
| 1067 | |
| 1068 | ret = cmp(uv1, uv2) |
| 1069 | if ret: |
| 1070 | return ret |
| 1071 | return cmp(glyphName1, glyphName2) |
| 1072 | |
| 1073 | |
| 1074 | class cmap_format_14(CmapSubtable): |
| 1075 | |
| 1076 | def decompileHeader(self, data, ttFont): |
| 1077 | format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) |
| 1078 | self.data = data[10:] |
| 1079 | self.length = length |
| 1080 | self.numVarSelectorRecords = numVarSelectorRecords |
| 1081 | self.ttFont = ttFont |
| 1082 | self.language = 0xFF # has no language. |
| 1083 | |
| 1084 | def decompile(self, data, ttFont): |
| 1085 | if data != None and ttFont != None: |
| 1086 | self.decompileHeader(data, ttFont) |
| 1087 | else: |
pabs3 | 17012aa | 2009-11-08 15:55:53 +0000 | [diff] [blame] | 1088 | assert (data == None and ttFont == None), "Need both data and ttFont arguments" |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1089 | data = self.data |
| 1090 | |
| 1091 | self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. |
| 1092 | uvsDict = {} |
| 1093 | recOffset = 0 |
| 1094 | for n in range(self.numVarSelectorRecords): |
| 1095 | uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) |
| 1096 | recOffset += 11 |
| 1097 | varUVS = cvtToUVS(uvs) |
| 1098 | if defOVSOffset: |
| 1099 | startOffset = defOVSOffset - 10 |
| 1100 | numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) |
| 1101 | startOffset +=4 |
| 1102 | for r in range(numValues): |
| 1103 | uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) |
| 1104 | startOffset += 4 |
| 1105 | firstBaseUV = cvtToUVS(uv) |
| 1106 | cnt = addtlCnt+1 |
| 1107 | baseUVList = range(firstBaseUV, firstBaseUV+cnt) |
| 1108 | glyphList = [None]*cnt |
| 1109 | localUVList = zip(baseUVList, glyphList) |
| 1110 | try: |
| 1111 | uvsDict[varUVS].extend(localUVList) |
| 1112 | except KeyError: |
| 1113 | uvsDict[varUVS] = localUVList |
| 1114 | |
| 1115 | if nonDefUVSOffset: |
| 1116 | startOffset = nonDefUVSOffset - 10 |
| 1117 | numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) |
| 1118 | startOffset +=4 |
| 1119 | localUVList = [] |
| 1120 | for r in range(numRecs): |
| 1121 | uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) |
| 1122 | startOffset += 5 |
| 1123 | uv = cvtToUVS(uv) |
| 1124 | glyphName = self.ttFont.getGlyphName(gid) |
| 1125 | localUVList.append( [uv, glyphName] ) |
| 1126 | try: |
| 1127 | uvsDict[varUVS].extend(localUVList) |
| 1128 | except KeyError: |
| 1129 | uvsDict[varUVS] = localUVList |
| 1130 | |
| 1131 | self.uvsDict = uvsDict |
| 1132 | |
| 1133 | def toXML(self, writer, ttFont): |
| 1134 | writer.begintag(self.__class__.__name__, [ |
| 1135 | ("platformID", self.platformID), |
| 1136 | ("platEncID", self.platEncID), |
| 1137 | ("format", self.format), |
| 1138 | ("length", self.length), |
| 1139 | ("numVarSelectorRecords", self.numVarSelectorRecords), |
| 1140 | ]) |
| 1141 | writer.newline() |
| 1142 | uvsDict = self.uvsDict |
| 1143 | uvsList = uvsDict.keys() |
| 1144 | uvsList.sort() |
| 1145 | for uvs in uvsList: |
| 1146 | uvList = uvsDict[uvs] |
| 1147 | uvList.sort(cmpUVSListEntry) |
| 1148 | for uv, gname in uvList: |
| 1149 | if gname == None: |
| 1150 | gname = "None" |
| 1151 | # I use the arg rather than th keyword syntax in order to preserve the attribute order. |
| 1152 | writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] ) |
| 1153 | writer.newline() |
| 1154 | writer.endtag(self.__class__.__name__) |
| 1155 | writer.newline() |
| 1156 | |
| 1157 | def fromXML(self, (name, attrs, content), ttFont): |
| 1158 | self.format = safeEval(attrs["format"]) |
| 1159 | self.length = safeEval(attrs["length"]) |
| 1160 | self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"]) |
| 1161 | self.language = 0xFF # provide a value so that CmapSubtable.__cmp__() won't fail |
| 1162 | if not hasattr(self, "cmap"): |
| 1163 | self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. |
| 1164 | if not hasattr(self, "uvsDict"): |
| 1165 | self.uvsDict = {} |
| 1166 | uvsDict = self.uvsDict |
| 1167 | |
| 1168 | for element in content: |
| 1169 | if type(element) <> TupleType: |
| 1170 | continue |
| 1171 | name, attrs, content = element |
| 1172 | if name <> "map": |
| 1173 | continue |
| 1174 | uvs = safeEval(attrs["uvs"]) |
| 1175 | uv = safeEval(attrs["uv"]) |
| 1176 | gname = attrs["name"] |
| 1177 | if gname == "None": |
| 1178 | gname = None |
| 1179 | try: |
| 1180 | uvsDict[uvs].append( [uv, gname]) |
| 1181 | except KeyError: |
| 1182 | uvsDict[uvs] = [ [uv, gname] ] |
| 1183 | |
| 1184 | |
| 1185 | def compile(self, ttFont): |
| 1186 | if self.data: |
| 1187 | return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data |
| 1188 | |
| 1189 | uvsDict = self.uvsDict |
| 1190 | uvsList = uvsDict.keys() |
| 1191 | uvsList.sort() |
| 1192 | self.numVarSelectorRecords = len(uvsList) |
| 1193 | offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. |
| 1194 | data = [] |
| 1195 | varSelectorRecords =[] |
| 1196 | for uvs in uvsList: |
| 1197 | entryList = uvsDict[uvs] |
| 1198 | |
| 1199 | defList = filter(lambda entry: entry[1] == None, entryList) |
| 1200 | if defList: |
| 1201 | defList = map(lambda entry: entry[0], defList) |
| 1202 | defOVSOffset = offset |
| 1203 | defList.sort() |
| 1204 | |
| 1205 | lastUV = defList[0] |
| 1206 | cnt = -1 |
| 1207 | defRecs = [] |
| 1208 | for defEntry in defList: |
| 1209 | cnt +=1 |
| 1210 | if (lastUV+cnt) != defEntry: |
| 1211 | rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) |
| 1212 | lastUV = defEntry |
| 1213 | defRecs.append(rec) |
| 1214 | cnt = 0 |
| 1215 | |
| 1216 | rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) |
| 1217 | defRecs.append(rec) |
| 1218 | |
| 1219 | numDefRecs = len(defRecs) |
| 1220 | data.append(struct.pack(">L", numDefRecs)) |
| 1221 | data.extend(defRecs) |
| 1222 | offset += 4 + numDefRecs*4 |
| 1223 | else: |
| 1224 | defOVSOffset = 0 |
| 1225 | |
| 1226 | ndefList = filter(lambda entry: entry[1] != None, entryList) |
| 1227 | if ndefList: |
| 1228 | nonDefUVSOffset = offset |
| 1229 | ndefList.sort() |
| 1230 | numNonDefRecs = len(ndefList) |
| 1231 | data.append(struct.pack(">L", numNonDefRecs)) |
| 1232 | offset += 4 + numNonDefRecs*5 |
| 1233 | |
| 1234 | for uv, gname in ndefList: |
| 1235 | gid = ttFont.getGlyphID(gname) |
| 1236 | ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) |
| 1237 | data.append(ndrec) |
| 1238 | else: |
| 1239 | nonDefUVSOffset = 0 |
| 1240 | |
| 1241 | vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) |
| 1242 | varSelectorRecords.append(vrec) |
| 1243 | |
| 1244 | data = "".join(varSelectorRecords) + "".join(data) |
| 1245 | self.length = 10 + len(data) |
| 1246 | headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) |
| 1247 | self.data = headerdata + data |
| 1248 | |
| 1249 | return self.data |
| 1250 | |
| 1251 | |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1252 | class cmap_format_unknown(CmapSubtable): |
| 1253 | |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1254 | def toXML(self, writer, ttFont): |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1255 | cmapName = self.__class__.__name__[:12] + str(self.format) |
| 1256 | writer.begintag(cmapName, [ |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1257 | ("platformID", self.platformID), |
| 1258 | ("platEncID", self.platEncID), |
| 1259 | ]) |
| 1260 | writer.newline() |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1261 | writer.dumphex(self.data) |
| 1262 | writer.endtag(cmapName) |
jvr | a84b28d | 2004-09-25 09:06:58 +0000 | [diff] [blame] | 1263 | writer.newline() |
| 1264 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1265 | def fromXML(self, (name, attrs, content), ttFont): |
| 1266 | self.data = readHex(content) |
| 1267 | self.cmap = {} |
| 1268 | |
| 1269 | def decompileHeader(self, data, ttFont): |
jvr | 427f980 | 2004-09-26 18:32:50 +0000 | [diff] [blame] | 1270 | self.language = 0 # dummy value |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1271 | self.data = data |
| 1272 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1273 | def decompile(self, data, ttFont): |
| 1274 | # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. |
| 1275 | # If not, someone is calling the subtable decompile() directly, and must provide both args. |
| 1276 | if data != None and ttFont != None: |
| 1277 | self.decompileHeader(data[offset:offset+int(length)], ttFont) |
| 1278 | else: |
pabs3 | 17012aa | 2009-11-08 15:55:53 +0000 | [diff] [blame] | 1279 | assert (data == None and ttFont == None), "Need both data and ttFont arguments" |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1280 | |
jvr | d299b55 | 2006-10-21 13:54:30 +0000 | [diff] [blame] | 1281 | def compile(self, ttFont): |
| 1282 | if self.data: |
| 1283 | return self.data |
| 1284 | else: |
| 1285 | return None |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1286 | |
| 1287 | cmap_classes = { |
| 1288 | 0: cmap_format_0, |
| 1289 | 2: cmap_format_2, |
| 1290 | 4: cmap_format_4, |
| 1291 | 6: cmap_format_6, |
jvr | 924e4e2 | 2003-02-08 10:45:23 +0000 | [diff] [blame] | 1292 | 12: cmap_format_12, |
jvr | 0cb8a08 | 2008-05-16 15:07:09 +0000 | [diff] [blame] | 1293 | 14: cmap_format_14, |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1294 | } |