Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1 | import DefaultTable |
| 2 | import struct |
| 3 | import string |
| 4 | import array |
| 5 | from fontTools import ttLib |
| 6 | from fontTools.misc.textTools import safeEval, readHex |
| 7 | |
| 8 | |
| 9 | class table__c_m_a_p(DefaultTable.DefaultTable): |
| 10 | |
| 11 | def getcmap(self, platformID, platEncID): |
| 12 | for subtable in self.tables: |
| 13 | if (subtable.platformID == platformID and |
| 14 | subtable.platEncID == platEncID): |
| 15 | return subtable |
| 16 | return None # not found |
| 17 | |
| 18 | def decompile(self, data, ttFont): |
| 19 | tableVersion, numSubTables = struct.unpack(">HH", data[:4]) |
| 20 | self.tableVersion = int(tableVersion) |
| 21 | self.tables = tables = [] |
| 22 | for i in range(numSubTables): |
| 23 | platformID, platEncID, offset = struct.unpack( |
| 24 | ">HHl", data[4+i*8:4+(i+1)*8]) |
| 25 | platformID, platEncID = int(platformID), int(platEncID) |
| 26 | format, length = struct.unpack(">HH", data[offset:offset+4]) |
| 27 | if not cmap_classes.has_key(format): |
| 28 | table = cmap_format_unknown(format) |
| 29 | else: |
| 30 | table = cmap_classes[format](format) |
| 31 | table.platformID = platformID |
| 32 | table.platEncID = platEncID |
| 33 | table.decompile(data[offset:offset+int(length)], ttFont) |
| 34 | tables.append(table) |
| 35 | |
| 36 | def compile(self, ttFont): |
| 37 | self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__() |
| 38 | numSubTables = len(self.tables) |
| 39 | totalOffset = 4 + 8 * numSubTables |
| 40 | data = struct.pack(">HH", self.tableVersion, numSubTables) |
| 41 | tableData = "" |
| 42 | done = {} # remember the data so we can reuse the "pointers" |
| 43 | for table in self.tables: |
| 44 | chunk = table.compile(ttFont) |
| 45 | if done.has_key(chunk): |
| 46 | offset = done[chunk] |
| 47 | else: |
| 48 | offset = done[chunk] = totalOffset + len(tableData) |
Just | 1b85098 | 2000-06-07 18:25:44 +0000 | [diff] [blame] | 49 | tableData = tableData + chunk |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 50 | data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) |
| 51 | return data + tableData |
| 52 | |
| 53 | def toXML(self, writer, ttFont): |
| 54 | writer.simpletag("tableVersion", version=self.tableVersion) |
| 55 | writer.newline() |
| 56 | for table in self.tables: |
| 57 | table.toXML(writer, ttFont) |
| 58 | |
| 59 | def fromXML(self, (name, attrs, content), ttFont): |
| 60 | if name == "tableVersion": |
| 61 | self.tableVersion = safeEval(attrs["version"]) |
| 62 | return |
| 63 | if name[:12] <> "cmap_format_": |
| 64 | return |
| 65 | if not hasattr(self, "tables"): |
| 66 | self.tables = [] |
| 67 | format = safeEval(name[12]) |
| 68 | if not cmap_classes.has_key(format): |
| 69 | table = cmap_format_unknown(format) |
| 70 | else: |
| 71 | table = cmap_classes[format](format) |
| 72 | table.platformID = safeEval(attrs["platformID"]) |
| 73 | table.platEncID = safeEval(attrs["platEncID"]) |
| 74 | table.fromXML((name, attrs, content), ttFont) |
| 75 | self.tables.append(table) |
| 76 | |
| 77 | |
| 78 | class CmapSubtable: |
| 79 | |
| 80 | def __init__(self, format): |
| 81 | self.format = format |
| 82 | |
| 83 | def toXML(self, writer, ttFont): |
| 84 | writer.begintag(self.__class__.__name__, [ |
| 85 | ("platformID", self.platformID), |
| 86 | ("platEncID", self.platEncID), |
| 87 | ]) |
| 88 | writer.newline() |
| 89 | writer.dumphex(self.compile(ttFont)) |
| 90 | writer.endtag(self.__class__.__name__) |
| 91 | writer.newline() |
| 92 | |
| 93 | def fromXML(self, (name, attrs, content), ttFont): |
| 94 | self.decompile(readHex(content), ttFont) |
| 95 | |
| 96 | def __cmp__(self, other): |
| 97 | # implemented so that list.sort() sorts according to the cmap spec. |
| 98 | selfTuple = ( |
| 99 | self.platformID, |
| 100 | self.platEncID, |
| 101 | self.version, |
| 102 | self.__dict__) |
| 103 | otherTuple = ( |
| 104 | other.platformID, |
| 105 | other.platEncID, |
| 106 | other.version, |
| 107 | other.__dict__) |
| 108 | return cmp(selfTuple, otherTuple) |
| 109 | |
| 110 | |
| 111 | class cmap_format_0(CmapSubtable): |
| 112 | |
| 113 | def decompile(self, data, ttFont): |
| 114 | format, length, version = struct.unpack(">HHH", data[:6]) |
| 115 | self.version = int(version) |
| 116 | assert len(data) == 262 == length |
| 117 | glyphIdArray = array.array("B") |
| 118 | glyphIdArray.fromstring(data[6:]) |
| 119 | self.cmap = cmap = {} |
| 120 | for charCode in range(len(glyphIdArray)): |
| 121 | cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode]) |
| 122 | |
| 123 | def compile(self, ttFont): |
| 124 | charCodes = self.cmap.keys() |
| 125 | charCodes.sort() |
| 126 | assert charCodes == range(256) # charCodes[charCode] == charCode |
| 127 | for charCode in charCodes: |
| 128 | # reusing the charCodes list! |
| 129 | charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode]) |
| 130 | glyphIdArray = array.array("B", charCodes) |
| 131 | data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring() |
| 132 | assert len(data) == 262 |
| 133 | return data |
| 134 | |
| 135 | def toXML(self, writer, ttFont): |
| 136 | writer.begintag(self.__class__.__name__, [ |
| 137 | ("platformID", self.platformID), |
| 138 | ("platEncID", self.platEncID), |
| 139 | ("version", self.version), |
| 140 | ]) |
| 141 | writer.newline() |
| 142 | items = self.cmap.items() |
| 143 | items.sort() |
| 144 | for code, name in items: |
| 145 | writer.simpletag("map", code=hex(code), name=name) |
| 146 | writer.newline() |
| 147 | writer.endtag(self.__class__.__name__) |
| 148 | writer.newline() |
| 149 | |
| 150 | def fromXML(self, (name, attrs, content), ttFont): |
| 151 | self.version = safeEval(attrs["version"]) |
| 152 | self.cmap = {} |
| 153 | for element in content: |
| 154 | if type(element) <> type(()): |
| 155 | continue |
| 156 | name, attrs, content = element |
| 157 | if name <> "map": |
| 158 | continue |
| 159 | self.cmap[safeEval(attrs["code"])] = attrs["name"] |
| 160 | |
| 161 | |
| 162 | |
| 163 | class cmap_format_2(CmapSubtable): |
| 164 | |
| 165 | def decompile(self, data, ttFont): |
| 166 | format, length, version = struct.unpack(">HHH", data[:6]) |
| 167 | self.version = int(version) |
| 168 | self.data = data |
| 169 | |
| 170 | def compile(self, ttFont): |
| 171 | return self.data |
| 172 | |
| 173 | |
| 174 | cmap_format_4_format = ">7H" |
| 175 | |
| 176 | #uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. |
| 177 | #uint16 reservedPad # This value should be zero |
| 178 | #uint16 startCode[segCount] # Starting character code for each segment |
| 179 | #uint16 idDelta[segCount] # Delta for all character codes in segment |
| 180 | #uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 |
| 181 | #uint16 glyphIndexArray[variable] # Glyph index array |
| 182 | |
| 183 | class cmap_format_4(CmapSubtable): |
| 184 | |
| 185 | def decompile(self, data, ttFont): |
| 186 | (format, length, self.version, segCountX2, |
| 187 | searchRange, entrySelector, rangeShift) = \ |
| 188 | struct.unpack(cmap_format_4_format, data[:14]) |
| 189 | assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length) |
| 190 | data = data[14:] |
| 191 | segCountX2 = int(segCountX2) |
| 192 | segCount = segCountX2 / 2 |
| 193 | |
| 194 | allcodes = array.array("H") |
| 195 | allcodes.fromstring(data) |
| 196 | if ttLib.endian <> "big": |
| 197 | allcodes.byteswap() |
| 198 | |
| 199 | # divide the data |
| 200 | endCode = allcodes[:segCount] |
| 201 | allcodes = allcodes[segCount+1:] |
| 202 | startCode = allcodes[:segCount] |
| 203 | allcodes = allcodes[segCount:] |
| 204 | idDelta = allcodes[:segCount] |
| 205 | allcodes = allcodes[segCount:] |
| 206 | idRangeOffset = allcodes[:segCount] |
| 207 | glyphIndexArray = allcodes[segCount:] |
| 208 | |
| 209 | # build 2-byte character mapping |
| 210 | cmap = {} |
| 211 | for i in range(len(startCode) - 1): # don't do 0xffff! |
| 212 | for charCode in range(startCode[i], endCode[i] + 1): |
| 213 | rangeOffset = idRangeOffset[i] |
| 214 | if rangeOffset == 0: |
| 215 | glyphID = charCode + idDelta[i] |
| 216 | else: |
| 217 | # *someone* needs to get killed. |
| 218 | index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset) |
| 219 | if glyphIndexArray[index] <> 0: # if not missing glyph |
| 220 | glyphID = glyphIndexArray[index] + idDelta[i] |
| 221 | else: |
| 222 | glyphID = 0 # missing glyph |
| 223 | cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000) |
| 224 | self.cmap = cmap |
| 225 | |
| 226 | def compile(self, ttFont): |
| 227 | from fontTools.ttLib.sfnt import maxpoweroftwo |
| 228 | |
| 229 | codes = self.cmap.items() |
| 230 | codes.sort() |
| 231 | |
| 232 | # build startCode and endCode lists |
| 233 | last = codes[0][0] |
| 234 | endCode = [] |
| 235 | startCode = [last] |
| 236 | for charCode, glyphName in codes[1:]: # skip the first code, it's the first start code |
| 237 | if charCode == last + 1: |
| 238 | last = charCode |
| 239 | continue |
| 240 | endCode.append(last) |
| 241 | startCode.append(charCode) |
| 242 | last = charCode |
| 243 | endCode.append(last) |
| 244 | startCode.append(0xffff) |
| 245 | endCode.append(0xffff) |
| 246 | |
| 247 | # build up rest of cruft. |
| 248 | idDelta = [] |
| 249 | idRangeOffset = [] |
| 250 | glyphIndexArray = [] |
| 251 | |
| 252 | for i in range(len(endCode)-1): # skip the closing codes (0xffff) |
| 253 | indices = [] |
| 254 | for charCode in range(startCode[i], endCode[i]+1): |
| 255 | indices.append(ttFont.getGlyphID(self.cmap[charCode])) |
| 256 | if indices == range(indices[0], indices[0] + len(indices)): |
| 257 | idDelta.append((indices[0] - startCode[i]) % 0x10000) |
| 258 | idRangeOffset.append(0) |
| 259 | else: |
| 260 | # someone *definitely* needs to get killed. |
| 261 | idDelta.append(0) |
| 262 | idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) |
| 263 | glyphIndexArray = glyphIndexArray + indices |
| 264 | idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef |
| 265 | idRangeOffset.append(0) |
| 266 | |
| 267 | # Insane. |
| 268 | segCount = len(endCode) |
| 269 | segCountX2 = segCount * 2 |
| 270 | maxexponent = maxpoweroftwo(segCount) |
| 271 | searchRange = 2 * (2 ** maxexponent) |
| 272 | entrySelector = maxexponent |
| 273 | rangeShift = 2 * segCount - searchRange |
| 274 | |
| 275 | allcodes = array.array("H", |
| 276 | endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray) |
| 277 | if ttLib.endian <> "big": |
| 278 | allcodes.byteswap() |
| 279 | data = allcodes.tostring() |
| 280 | length = struct.calcsize(cmap_format_4_format) + len(data) |
| 281 | header = struct.pack(cmap_format_4_format, self.format, length, self.version, |
| 282 | segCountX2, searchRange, entrySelector, rangeShift) |
| 283 | return header + data |
| 284 | |
| 285 | def toXML(self, writer, ttFont): |
| 286 | from fontTools.unicode import Unicode |
| 287 | codes = self.cmap.items() |
| 288 | codes.sort() |
| 289 | writer.begintag(self.__class__.__name__, [ |
| 290 | ("platformID", self.platformID), |
| 291 | ("platEncID", self.platEncID), |
| 292 | ("version", self.version), |
| 293 | ]) |
| 294 | writer.newline() |
| 295 | |
| 296 | for code, name in codes: |
| 297 | writer.simpletag("map", code=hex(code), name=name) |
| 298 | writer.comment(Unicode[code]) |
| 299 | writer.newline() |
| 300 | |
| 301 | writer.endtag(self.__class__.__name__) |
| 302 | writer.newline() |
| 303 | |
| 304 | def fromXML(self, (name, attrs, content), ttFont): |
| 305 | self.version = safeEval(attrs["version"]) |
| 306 | self.cmap = {} |
| 307 | for element in content: |
| 308 | if type(element) <> type(()): |
| 309 | continue |
| 310 | name, attrs, content = element |
| 311 | if name <> "map": |
| 312 | continue |
| 313 | self.cmap[safeEval(attrs["code"])] = attrs["name"] |
| 314 | |
| 315 | |
| 316 | class cmap_format_6(CmapSubtable): |
| 317 | |
| 318 | def decompile(self, data, ttFont): |
| 319 | format, length, version, firstCode, entryCount = struct.unpack( |
| 320 | ">HHHHH", data[:10]) |
| 321 | self.version = int(version) |
| 322 | firstCode = int(firstCode) |
| 323 | self.version = int(version) |
| 324 | data = data[10:] |
Just | f6b1563 | 2000-08-23 12:33:14 +0000 | [diff] [blame^] | 325 | #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 326 | glyphIndexArray = array.array("H") |
Just | f6b1563 | 2000-08-23 12:33:14 +0000 | [diff] [blame^] | 327 | glyphIndexArray.fromstring(data[:2 * entryCount]) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 328 | if ttLib.endian <> "big": |
| 329 | glyphIndexArray.byteswap() |
| 330 | self.cmap = cmap = {} |
| 331 | for i in range(len(glyphIndexArray)): |
| 332 | glyphID = glyphIndexArray[i] |
| 333 | glyphName = ttFont.getGlyphName(glyphID) |
| 334 | cmap[i+firstCode] = glyphName |
| 335 | |
| 336 | def compile(self, ttFont): |
| 337 | codes = self.cmap.keys() |
| 338 | codes.sort() |
| 339 | assert codes == range(codes[0], codes[0] + len(codes)) |
| 340 | glyphIndexArray = array.array("H", [0] * len(codes)) |
| 341 | firstCode = codes[0] |
| 342 | for i in range(len(codes)): |
| 343 | code = codes[i] |
| 344 | glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code]) |
| 345 | if ttLib.endian <> "big": |
| 346 | glyphIndexArray.byteswap() |
| 347 | data = glyphIndexArray.tostring() |
| 348 | header = struct.pack(">HHHHH", |
| 349 | 6, len(data) + 10, self.version, firstCode, len(self.cmap)) |
| 350 | return header + data |
| 351 | |
| 352 | def toXML(self, writer, ttFont): |
| 353 | codes = self.cmap.items() |
| 354 | codes.sort() |
| 355 | writer.begintag(self.__class__.__name__, [ |
| 356 | ("platformID", self.platformID), |
| 357 | ("platEncID", self.platEncID), |
| 358 | ("version", self.version), |
| 359 | ]) |
| 360 | writer.newline() |
| 361 | |
| 362 | for code, name in codes: |
| 363 | writer.simpletag("map", code=hex(code), name=name) |
| 364 | writer.newline() |
| 365 | |
| 366 | writer.endtag(self.__class__.__name__) |
| 367 | writer.newline() |
| 368 | |
| 369 | def fromXML(self, (name, attrs, content), ttFont): |
| 370 | self.version = safeEval(attrs["version"]) |
| 371 | self.cmap = {} |
| 372 | for element in content: |
| 373 | if type(element) <> type(()): |
| 374 | continue |
| 375 | name, attrs, content = element |
| 376 | if name <> "map": |
| 377 | continue |
| 378 | self.cmap[safeEval(attrs["code"])] = attrs["name"] |
| 379 | |
| 380 | |
| 381 | class cmap_format_unknown(CmapSubtable): |
| 382 | |
| 383 | def decompile(self, data, ttFont): |
| 384 | self.data = data |
| 385 | |
| 386 | def compile(self, ttFont): |
| 387 | return self.data |
| 388 | |
| 389 | |
| 390 | cmap_classes = { |
| 391 | 0: cmap_format_0, |
| 392 | 2: cmap_format_2, |
| 393 | 4: cmap_format_4, |
| 394 | 6: cmap_format_6, |
| 395 | } |
| 396 | |
| 397 | |