blob: d331a627e7570975983caf938c402d2451bd0d06 [file] [log] [blame]
jvr9be387c2008-03-01 11:43:01 +00001import sys
Just7842e561999-12-16 21:34:53 +00002import DefaultTable
3import struct
Just7842e561999-12-16 21:34:53 +00004import array
jvrd299b552006-10-21 13:54:30 +00005import operator
Just7842e561999-12-16 21:34:53 +00006from fontTools import ttLib
7from fontTools.misc.textTools import safeEval, readHex
jvr22dcb9e2002-05-10 19:03:34 +00008from types import TupleType
Just7842e561999-12-16 21:34:53 +00009
10
11class table__c_m_a_p(DefaultTable.DefaultTable):
12
13 def getcmap(self, platformID, platEncID):
14 for subtable in self.tables:
15 if (subtable.platformID == platformID and
16 subtable.platEncID == platEncID):
17 return subtable
18 return None # not found
19
20 def decompile(self, data, ttFont):
21 tableVersion, numSubTables = struct.unpack(">HH", data[:4])
22 self.tableVersion = int(tableVersion)
23 self.tables = tables = []
jvrd299b552006-10-21 13:54:30 +000024 seenOffsets = {}
Just7842e561999-12-16 21:34:53 +000025 for i in range(numSubTables):
26 platformID, platEncID, offset = struct.unpack(
27 ">HHl", data[4+i*8:4+(i+1)*8])
28 platformID, platEncID = int(platformID), int(platEncID)
29 format, length = struct.unpack(">HH", data[offset:offset+4])
jvr924e4e22003-02-08 10:45:23 +000030 if format in [8,10,12]:
31 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
jvr0cb8a082008-05-16 15:07:09 +000032 elif format in [14]:
33 format, length = struct.unpack(">HL", data[offset:offset+6])
34
jvr2db352c2008-02-29 14:43:49 +000035 if not length:
36 print "Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset)
37 continue
Just7842e561999-12-16 21:34:53 +000038 if not cmap_classes.has_key(format):
39 table = cmap_format_unknown(format)
40 else:
41 table = cmap_classes[format](format)
42 table.platformID = platformID
43 table.platEncID = platEncID
jvrd299b552006-10-21 13:54:30 +000044 # Note that by default we decompile only the subtable header info;
45 # any other data gets decompiled only when an attribute of the
46 # subtable is referenced.
47 table.decompileHeader(data[offset:offset+int(length)], ttFont)
48 if seenOffsets.has_key(offset):
49 table.cmap = tables[seenOffsets[offset]].cmap
50 else:
51 seenOffsets[offset] = i
Just7842e561999-12-16 21:34:53 +000052 tables.append(table)
53
54 def compile(self, ttFont):
55 self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__()
56 numSubTables = len(self.tables)
57 totalOffset = 4 + 8 * numSubTables
58 data = struct.pack(">HH", self.tableVersion, numSubTables)
59 tableData = ""
jvrd299b552006-10-21 13:54:30 +000060 seen = {} # Some tables are the same object reference. Don't compile them twice.
61 done = {} # Some tables are different objects, but compile to the same data chunk
Just7842e561999-12-16 21:34:53 +000062 for table in self.tables:
jvrd299b552006-10-21 13:54:30 +000063 try:
64 offset = seen[id(table.cmap)]
65 except KeyError:
66 chunk = table.compile(ttFont)
67 if done.has_key(chunk):
68 offset = done[chunk]
69 else:
70 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
71 tableData = tableData + chunk
Just7842e561999-12-16 21:34:53 +000072 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
73 return data + tableData
74
75 def toXML(self, writer, ttFont):
76 writer.simpletag("tableVersion", version=self.tableVersion)
77 writer.newline()
78 for table in self.tables:
79 table.toXML(writer, ttFont)
80
81 def fromXML(self, (name, attrs, content), ttFont):
82 if name == "tableVersion":
83 self.tableVersion = safeEval(attrs["version"])
84 return
85 if name[:12] <> "cmap_format_":
86 return
87 if not hasattr(self, "tables"):
88 self.tables = []
jvr0cd79a52004-09-25 07:30:47 +000089 format = safeEval(name[12:])
Just7842e561999-12-16 21:34:53 +000090 if not cmap_classes.has_key(format):
91 table = cmap_format_unknown(format)
92 else:
93 table = cmap_classes[format](format)
94 table.platformID = safeEval(attrs["platformID"])
95 table.platEncID = safeEval(attrs["platEncID"])
96 table.fromXML((name, attrs, content), ttFont)
97 self.tables.append(table)
98
99
100class CmapSubtable:
101
102 def __init__(self, format):
103 self.format = format
jvrd299b552006-10-21 13:54:30 +0000104 self.data = None
105 self.ttFont = None
106
107 def __getattr__(self, attr):
108 # allow lazy decompilation of subtables.
109 if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
110 raise AttributeError, attr
111 if self.data == None:
112 raise AttributeError, attr
113 self.decompile(None, None) # use saved data.
114 self.data = None # Once this table has been decompiled, make sure we don't
115 # just return the original data. Also avoids recursion when
116 # called with an attribute that the cmap subtable doesn't have.
117 return getattr(self, attr)
Just7842e561999-12-16 21:34:53 +0000118
jvrd299b552006-10-21 13:54:30 +0000119 def decompileHeader(self, data, ttFont):
120 format, length, language = struct.unpack(">HHH", data[:6])
121 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
122 self.format = int(format)
123 self.length = int(length)
124 self.language = int(language)
125 self.data = data[6:]
126 self.ttFont = ttFont
127
Just7842e561999-12-16 21:34:53 +0000128 def toXML(self, writer, ttFont):
129 writer.begintag(self.__class__.__name__, [
130 ("platformID", self.platformID),
131 ("platEncID", self.platEncID),
jvra84b28d2004-09-25 09:06:58 +0000132 ("language", self.language),
Just7842e561999-12-16 21:34:53 +0000133 ])
134 writer.newline()
jvra84b28d2004-09-25 09:06:58 +0000135 codes = self.cmap.items()
136 codes.sort()
137 self._writeCodes(codes, writer)
Just7842e561999-12-16 21:34:53 +0000138 writer.endtag(self.__class__.__name__)
139 writer.newline()
jvra84b28d2004-09-25 09:06:58 +0000140
141 def _writeCodes(self, codes, writer):
jvrd299b552006-10-21 13:54:30 +0000142 if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0:
jvra84b28d2004-09-25 09:06:58 +0000143 from fontTools.unicode import Unicode
144 isUnicode = 1
145 else:
146 isUnicode = 0
147 for code, name in codes:
148 writer.simpletag("map", code=hex(code), name=name)
149 if isUnicode:
150 writer.comment(Unicode[code])
151 writer.newline()
Just7842e561999-12-16 21:34:53 +0000152
Just7842e561999-12-16 21:34:53 +0000153 def __cmp__(self, other):
154 # implemented so that list.sort() sorts according to the cmap spec.
155 selfTuple = (
156 self.platformID,
157 self.platEncID,
jvr0cd79a52004-09-25 07:30:47 +0000158 self.language,
Just7842e561999-12-16 21:34:53 +0000159 self.__dict__)
160 otherTuple = (
161 other.platformID,
162 other.platEncID,
jvr0cd79a52004-09-25 07:30:47 +0000163 other.language,
Just7842e561999-12-16 21:34:53 +0000164 other.__dict__)
165 return cmp(selfTuple, otherTuple)
166
167
168class cmap_format_0(CmapSubtable):
169
170 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000171 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
172 # If not, someone is calling the subtable decompile() directly, and must provide both args.
173 if data != None and ttFont != None:
174 self.decompileHeader(data[offset:offset+int(length)], ttFont)
175 else:
pabs317012aa2009-11-08 15:55:53 +0000176 assert (data == None and ttFont == None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000177 data = self.data # decompileHeader assigns the data after the header to self.data
178 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
Just7842e561999-12-16 21:34:53 +0000179 glyphIdArray = array.array("B")
jvrd299b552006-10-21 13:54:30 +0000180 glyphIdArray.fromstring(self.data)
Just7842e561999-12-16 21:34:53 +0000181 self.cmap = cmap = {}
jvrd299b552006-10-21 13:54:30 +0000182 lenArray = len(glyphIdArray)
183 charCodes = range(lenArray)
184 names = map(self.ttFont.getGlyphName, glyphIdArray)
185 map(operator.setitem, [cmap]*lenArray, charCodes, names)
186
Just7842e561999-12-16 21:34:53 +0000187
188 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000189 if self.data:
190 return struct.pack(">HHH", 0, 262, self.language) + self.data
191
192 charCodeList = self.cmap.items()
193 charCodeList.sort()
194 charCodes = [entry[0] for entry in charCodeList]
195 valueList = [entry[1] for entry in charCodeList]
196 assert charCodes == range(256)
197 valueList = map(ttFont.getGlyphID, valueList)
198
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400199 glyphIdArray = array.array("B", valueList)
jvr0cd79a52004-09-25 07:30:47 +0000200 data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
Just7842e561999-12-16 21:34:53 +0000201 assert len(data) == 262
202 return data
203
Just7842e561999-12-16 21:34:53 +0000204 def fromXML(self, (name, attrs, content), ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000205 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000206 if not hasattr(self, "cmap"):
207 self.cmap = {}
208 cmap = self.cmap
Just7842e561999-12-16 21:34:53 +0000209 for element in content:
jvr22dcb9e2002-05-10 19:03:34 +0000210 if type(element) <> TupleType:
Just7842e561999-12-16 21:34:53 +0000211 continue
212 name, attrs, content = element
213 if name <> "map":
214 continue
jvrd299b552006-10-21 13:54:30 +0000215 cmap[safeEval(attrs["code"])] = attrs["name"]
Just7842e561999-12-16 21:34:53 +0000216
217
jvrbafa66e2003-08-28 18:04:23 +0000218subHeaderFormat = ">HHhH"
219class SubHeader:
220 def __init__(self):
221 self.firstCode = None
222 self.entryCount = None
223 self.idDelta = None
224 self.idRangeOffset = None
225 self.glyphIndexArray = []
226
Just7842e561999-12-16 21:34:53 +0000227class cmap_format_2(CmapSubtable):
228
jvrd299b552006-10-21 13:54:30 +0000229 def setIDDelta(self, subHeader):
230 subHeader.idDelta = 0
231 # find the minGI which is not zero.
232 minGI = subHeader.glyphIndexArray[0]
233 for gid in subHeader.glyphIndexArray:
234 if (gid != 0) and (gid < minGI):
235 minGI = gid
236 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
237 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
238 # We would like to pick an idDelta such that the first glyphArray GID is 1,
239 # so that we are more likely to be able to combine glypharray GID subranges.
240 # This means that we have a problem when minGI is > 32K
241 # Since the final gi is reconstructed from the glyphArray GID by:
242 # (short)finalGID = (gid + idDelta) % 0x10000),
243 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
244 # negative number to an unsigned short.
245
246 if (minGI > 1):
247 if minGI > 0x7FFF:
248 subHeader.idDelta = -(0x10000 - minGI) -1
249 else:
250 subHeader.idDelta = minGI -1
251 idDelta = subHeader.idDelta
252 for i in range(subHeader.entryCount):
253 gid = subHeader.glyphIndexArray[i]
254 if gid > 0:
255 subHeader.glyphIndexArray[i] = gid - idDelta
256
257
Just7842e561999-12-16 21:34:53 +0000258 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000259 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
260 # If not, someone is calling the subtable decompile() directly, and must provide both args.
261 if data != None and ttFont != None:
262 self.decompileHeader(data[offset:offset+int(length)], ttFont)
263 else:
pabs317012aa2009-11-08 15:55:53 +0000264 assert (data == None and ttFont == None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000265
266 data = self.data # decompileHeader assigns the data after the header to self.data
jvrbafa66e2003-08-28 18:04:23 +0000267 subHeaderKeys = []
268 maxSubHeaderindex = 0
jvrbafa66e2003-08-28 18:04:23 +0000269 # get the key array, and determine the number of subHeaders.
jvrd299b552006-10-21 13:54:30 +0000270 allKeys = array.array("H")
271 allKeys.fromstring(data[:512])
272 data = data[512:]
jvr9be387c2008-03-01 11:43:01 +0000273 if sys.byteorder <> "big":
jvrd299b552006-10-21 13:54:30 +0000274 allKeys.byteswap()
275 subHeaderKeys = [ key/8 for key in allKeys]
276 maxSubHeaderindex = max(subHeaderKeys)
Just7842e561999-12-16 21:34:53 +0000277
jvrbafa66e2003-08-28 18:04:23 +0000278 #Load subHeaders
279 subHeaderList = []
jvrd299b552006-10-21 13:54:30 +0000280 pos = 0
jvrbafa66e2003-08-28 18:04:23 +0000281 for i in range(maxSubHeaderindex + 1):
282 subHeader = SubHeader()
283 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
jvrd299b552006-10-21 13:54:30 +0000284 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
285 pos += 8
286 giDataPos = pos + subHeader.idRangeOffset-2
287 giList = array.array("H")
288 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
jvr9be387c2008-03-01 11:43:01 +0000289 if sys.byteorder <> "big":
jvrd299b552006-10-21 13:54:30 +0000290 giList.byteswap()
291 subHeader.glyphIndexArray = giList
jvrbafa66e2003-08-28 18:04:23 +0000292 subHeaderList.append(subHeader)
jvrbafa66e2003-08-28 18:04:23 +0000293 # How this gets processed.
294 # Charcodes may be one or two bytes.
295 # The first byte of a charcode is mapped through the subHeaderKeys, to select
296 # a subHeader. For any subheader but 0, the next byte is then mapped through the
297 # selected subheader. If subheader Index 0 is selected, then the byte itself is
298 # mapped through the subheader, and there is no second byte.
299 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
300 #
301 # Each subheader references a range in the glyphIndexArray whose length is entryCount.
302 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
303 # referenced by another subheader.
304 # The only subheader that will be referenced by more than one first-byte value is the subheader
305 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
306 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
307 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
308 # A subheader specifies a subrange within (0...256) by the
309 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
310 # (e.g. glyph not in font).
311 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
312 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
313 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
314 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
315 # Example for Logocut-Medium
316 # first byte of charcode = 129; selects subheader 1.
317 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
318 # second byte of charCode = 66
319 # the index offset = 66-64 = 2.
320 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
321 # [glyphIndexArray index], [subrange array index] = glyphIndex
322 # [256], [0]=1 from charcode [129, 64]
323 # [257], [1]=2 from charcode [129, 65]
324 # [258], [2]=3 from charcode [129, 66]
325 # [259], [3]=4 from charcode [129, 67]
jvrd299b552006-10-21 13:54:30 +0000326 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
327 # add it to the glyphID to get the final glyphIndex
jvrbafa66e2003-08-28 18:04:23 +0000328 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
jvrbafa66e2003-08-28 18:04:23 +0000329
330 self.data = ""
jvrd299b552006-10-21 13:54:30 +0000331 self.cmap = cmap = {}
332 notdefGI = 0
jvrbafa66e2003-08-28 18:04:23 +0000333 for firstByte in range(256):
334 subHeadindex = subHeaderKeys[firstByte]
335 subHeader = subHeaderList[subHeadindex]
336 if subHeadindex == 0:
337 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
jvrd299b552006-10-21 13:54:30 +0000338 continue # gi is notdef.
jvrbafa66e2003-08-28 18:04:23 +0000339 else:
340 charCode = firstByte
341 offsetIndex = firstByte - subHeader.firstCode
342 gi = subHeader.glyphIndexArray[offsetIndex]
343 if gi != 0:
jvrd299b552006-10-21 13:54:30 +0000344 gi = (gi + subHeader.idDelta) % 0x10000
345 else:
346 continue # gi is notdef.
347 cmap[charCode] = gi
jvrbafa66e2003-08-28 18:04:23 +0000348 else:
349 if subHeader.entryCount:
jvrd299b552006-10-21 13:54:30 +0000350 charCodeOffset = firstByte * 256 + subHeader.firstCode
jvrbafa66e2003-08-28 18:04:23 +0000351 for offsetIndex in range(subHeader.entryCount):
jvrd299b552006-10-21 13:54:30 +0000352 charCode = charCodeOffset + offsetIndex
jvrbafa66e2003-08-28 18:04:23 +0000353 gi = subHeader.glyphIndexArray[offsetIndex]
354 if gi != 0:
jvrd299b552006-10-21 13:54:30 +0000355 gi = (gi + subHeader.idDelta) % 0x10000
356 else:
357 continue
358 cmap[charCode] = gi
359 # If not subHeader.entryCount, then all char codes with this first byte are
360 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
361 # same as mapping it to .notdef.
362 # cmap values are GID's.
363 glyphOrder = self.ttFont.getGlyphOrder()
364 gids = cmap.values()
365 charCodes = cmap.keys()
366 lenCmap = len(gids)
367 try:
368 names = map(operator.getitem, [glyphOrder]*lenCmap, gids )
369 except IndexError:
370 getGlyphName = self.ttFont.getGlyphName
371 names = map(getGlyphName, gids )
372 map(operator.setitem, [cmap]*lenCmap, charCodes, names)
373
jvrbafa66e2003-08-28 18:04:23 +0000374
Just7842e561999-12-16 21:34:53 +0000375 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000376 if self.data:
377 return struct.pack(">HHH", self.format, self.length, self.language) + self.data
jvrbafa66e2003-08-28 18:04:23 +0000378 kEmptyTwoCharCodeRange = -1
jvrd299b552006-10-21 13:54:30 +0000379 notdefGI = 0
380
jvrbafa66e2003-08-28 18:04:23 +0000381 items = self.cmap.items()
382 items.sort()
jvrd299b552006-10-21 13:54:30 +0000383 charCodes = [item[0] for item in items]
384 names = [item[1] for item in items]
385 nameMap = ttFont.getReverseGlyphMap()
386 lenCharCodes = len(charCodes)
387 try:
388 gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
389 except KeyError:
390 nameMap = ttFont.getReverseGlyphMap(rebuild=1)
391 try:
392 gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
393 except KeyError:
394 # allow virtual GIDs in format 2 tables
395 gids = []
396 for name in names:
397 try:
398 gid = nameMap[name]
399 except KeyError:
400 try:
401 if (name[:3] == 'gid'):
402 gid = eval(name[3:])
403 else:
404 gid = ttFont.getGlyphID(name)
405 except:
406 raise KeyError(name)
jvrbafa66e2003-08-28 18:04:23 +0000407
jvrd299b552006-10-21 13:54:30 +0000408 gids.append(gid)
409
410 # Process the (char code to gid) item list in char code order.
411 # By definition, all one byte char codes map to subheader 0.
412 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
413 # which defines all char codes in its range to map to notdef) unless proven otherwise.
414 # Note that since the char code items are processed in char code order, all the char codes with the
415 # same first byte are in sequential order.
416
417 subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList.
jvrbafa66e2003-08-28 18:04:23 +0000418 subHeaderList = []
jvrd299b552006-10-21 13:54:30 +0000419
420 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
421 # with a cmap where all the one byte char codes map to notdef,
422 # with the result that the subhead 0 would not get created just by processing the item list.
423 charCode = charCodes[0]
424 if charCode > 255:
425 subHeader = SubHeader()
426 subHeader.firstCode = 0
427 subHeader.entryCount = 0
428 subHeader.idDelta = 0
429 subHeader.idRangeOffset = 0
430 subHeaderList.append(subHeader)
431
jvrbafa66e2003-08-28 18:04:23 +0000432
433 lastFirstByte = -1
jvrd299b552006-10-21 13:54:30 +0000434 items = zip(charCodes, gids)
435 for charCode, gid in items:
436 if gid == 0:
437 continue
jvrbafa66e2003-08-28 18:04:23 +0000438 firstbyte = charCode >> 8
439 secondByte = charCode & 0x00FF
jvrd299b552006-10-21 13:54:30 +0000440
441 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
jvrbafa66e2003-08-28 18:04:23 +0000442 if lastFirstByte > -1:
jvrd299b552006-10-21 13:54:30 +0000443 # fix GI's and iDelta of current subheader.
444 self.setIDDelta(subHeader)
445
446 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
447 # for the indices matching the char codes.
448 if lastFirstByte == 0:
449 for index in range(subHeader.entryCount):
450 charCode = subHeader.firstCode + index
451 subHeaderKeys[charCode] = 0
452
jvrbafa66e2003-08-28 18:04:23 +0000453 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
454 # init new subheader
455 subHeader = SubHeader()
456 subHeader.firstCode = secondByte
jvrd299b552006-10-21 13:54:30 +0000457 subHeader.entryCount = 1
458 subHeader.glyphIndexArray.append(gid)
459 subHeaderList.append(subHeader)
460 subHeaderKeys[firstbyte] = len(subHeaderList) -1
jvrbafa66e2003-08-28 18:04:23 +0000461 lastFirstByte = firstbyte
462 else:
jvrd299b552006-10-21 13:54:30 +0000463 # need to fill in with notdefs all the code points between the last charCode and the current charCode.
jvrbafa66e2003-08-28 18:04:23 +0000464 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
465 for i in range(codeDiff):
jvrd299b552006-10-21 13:54:30 +0000466 subHeader.glyphIndexArray.append(notdefGI)
467 subHeader.glyphIndexArray.append(gid)
jvrbafa66e2003-08-28 18:04:23 +0000468 subHeader.entryCount = subHeader.entryCount + codeDiff + 1
jvrd299b552006-10-21 13:54:30 +0000469
470 # fix GI's and iDelta of last subheader that we we added to the subheader array.
471 self.setIDDelta(subHeader)
jvrbafa66e2003-08-28 18:04:23 +0000472
jvrd299b552006-10-21 13:54:30 +0000473 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
jvrbafa66e2003-08-28 18:04:23 +0000474 subHeader = SubHeader()
475 subHeader.firstCode = 0
476 subHeader.entryCount = 0
477 subHeader.idDelta = 0
478 subHeader.idRangeOffset = 2
479 subHeaderList.append(subHeader)
480 emptySubheadIndex = len(subHeaderList) - 1
481 for index in range(256):
jvrd299b552006-10-21 13:54:30 +0000482 if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
jvrbafa66e2003-08-28 18:04:23 +0000483 subHeaderKeys[index] = emptySubheadIndex
484 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
jvrd299b552006-10-21 13:54:30 +0000485 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
jvrbafa66e2003-08-28 18:04:23 +0000486 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
487 # charcode 0 and GID 0.
488
jvrbafa66e2003-08-28 18:04:23 +0000489 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
jvrd299b552006-10-21 13:54:30 +0000490 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
491 for index in range(subheadRangeLen):
492 subHeader = subHeaderList[index]
493 subHeader.idRangeOffset = 0
494 for j in range(index):
495 prevSubhead = subHeaderList[j]
496 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
497 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
498 subHeader.glyphIndexArray = []
499 break
500 if subHeader.idRangeOffset == 0: # didn't find one.
501 subHeader.idRangeOffset = idRangeOffset
502 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
503 else:
504 idRangeOffset = idRangeOffset - 8 # one less subheader
505
jvrbafa66e2003-08-28 18:04:23 +0000506 # Now we can write out the data!
507 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
508 for subhead in subHeaderList[:-1]:
jvrd299b552006-10-21 13:54:30 +0000509 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays.
510 dataList = [struct.pack(">HHH", 2, length, self.language)]
jvrbafa66e2003-08-28 18:04:23 +0000511 for index in subHeaderKeys:
jvrd299b552006-10-21 13:54:30 +0000512 dataList.append(struct.pack(">H", index*8))
jvrbafa66e2003-08-28 18:04:23 +0000513 for subhead in subHeaderList:
jvrd299b552006-10-21 13:54:30 +0000514 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
jvrbafa66e2003-08-28 18:04:23 +0000515 for subhead in subHeaderList[:-1]:
516 for gi in subhead.glyphIndexArray:
jvrd299b552006-10-21 13:54:30 +0000517 dataList.append(struct.pack(">H", gi))
518 data = "".join(dataList)
jvrbafa66e2003-08-28 18:04:23 +0000519 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
520 return data
jvrd299b552006-10-21 13:54:30 +0000521
522
jvrbafa66e2003-08-28 18:04:23 +0000523 def fromXML(self, (name, attrs, content), ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000524 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000525 if not hasattr(self, "cmap"):
526 self.cmap = {}
527 cmap = self.cmap
528
jvrbafa66e2003-08-28 18:04:23 +0000529 for element in content:
530 if type(element) <> TupleType:
531 continue
532 name, attrs, content = element
533 if name <> "map":
534 continue
jvrd299b552006-10-21 13:54:30 +0000535 cmap[safeEval(attrs["code"])] = attrs["name"]
Just7842e561999-12-16 21:34:53 +0000536
537
538cmap_format_4_format = ">7H"
539
jvr1f8a4bb2002-07-23 07:51:23 +0000540#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF.
541#uint16 reservedPad # This value should be zero
542#uint16 startCode[segCount] # Starting character code for each segment
543#uint16 idDelta[segCount] # Delta for all character codes in segment
544#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0
545#uint16 glyphIndexArray[variable] # Glyph index array
Just7842e561999-12-16 21:34:53 +0000546
jvr542b9512002-07-20 21:57:26 +0000547def splitRange(startCode, endCode, cmap):
jvr1f8a4bb2002-07-23 07:51:23 +0000548 # Try to split a range of character codes into subranges with consecutive
549 # glyph IDs in such a way that the cmap4 subtable can be stored "most"
550 # efficiently. I can't prove I've got the optimal solution, but it seems
551 # to do well with the fonts I tested: none became bigger, many became smaller.
jvr542b9512002-07-20 21:57:26 +0000552 if startCode == endCode:
553 return [], [endCode]
554
jvr542b9512002-07-20 21:57:26 +0000555 lastID = cmap[startCode]
556 lastCode = startCode
557 inOrder = None
558 orderedBegin = None
jvr1f8a4bb2002-07-23 07:51:23 +0000559 subRanges = []
jvr542b9512002-07-20 21:57:26 +0000560
jvr1f8a4bb2002-07-23 07:51:23 +0000561 # Gather subranges in which the glyph IDs are consecutive.
jvr542b9512002-07-20 21:57:26 +0000562 for code in range(startCode + 1, endCode + 1):
563 glyphID = cmap[code]
jvr542b9512002-07-20 21:57:26 +0000564
565 if glyphID - 1 == lastID:
566 if inOrder is None or not inOrder:
567 inOrder = 1
568 orderedBegin = lastCode
569 else:
570 if inOrder:
571 inOrder = 0
jvr1f8a4bb2002-07-23 07:51:23 +0000572 subRanges.append((orderedBegin, lastCode))
jvr542b9512002-07-20 21:57:26 +0000573 orderedBegin = None
574
575 lastID = glyphID
576 lastCode = code
577
578 if inOrder:
jvr1f8a4bb2002-07-23 07:51:23 +0000579 subRanges.append((orderedBegin, lastCode))
jvr542b9512002-07-20 21:57:26 +0000580 assert lastCode == endCode
581
jvr1f8a4bb2002-07-23 07:51:23 +0000582 # Now filter out those new subranges that would only make the data bigger.
583 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per
584 # character.
585 newRanges = []
586 for b, e in subRanges:
jvr542b9512002-07-20 21:57:26 +0000587 if b == startCode and e == endCode:
588 break # the whole range, we're fine
589 if b == startCode or e == endCode:
590 threshold = 4 # split costs one more segment
591 else:
592 threshold = 8 # split costs two more segments
593 if (e - b + 1) > threshold:
jvr1f8a4bb2002-07-23 07:51:23 +0000594 newRanges.append((b, e))
595 subRanges = newRanges
jvr542b9512002-07-20 21:57:26 +0000596
jvr1f8a4bb2002-07-23 07:51:23 +0000597 if not subRanges:
jvr542b9512002-07-20 21:57:26 +0000598 return [], [endCode]
599
jvr1f8a4bb2002-07-23 07:51:23 +0000600 if subRanges[0][0] != startCode:
601 subRanges.insert(0, (startCode, subRanges[0][0] - 1))
602 if subRanges[-1][1] != endCode:
603 subRanges.append((subRanges[-1][1] + 1, endCode))
604
605 # Fill the "holes" in the segments list -- those are the segments in which
606 # the glyph IDs are _not_ consecutive.
jvr542b9512002-07-20 21:57:26 +0000607 i = 1
jvr1f8a4bb2002-07-23 07:51:23 +0000608 while i < len(subRanges):
609 if subRanges[i-1][1] + 1 != subRanges[i][0]:
610 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
jvr542b9512002-07-20 21:57:26 +0000611 i = i + 1
612 i = i + 1
613
jvr1f8a4bb2002-07-23 07:51:23 +0000614 # Transform the ranges into startCode/endCode lists.
jvr542b9512002-07-20 21:57:26 +0000615 start = []
616 end = []
jvr1f8a4bb2002-07-23 07:51:23 +0000617 for b, e in subRanges:
jvr542b9512002-07-20 21:57:26 +0000618 start.append(b)
619 end.append(e)
620 start.pop(0)
621
622 assert len(start) + 1 == len(end)
623 return start, end
624
625
Just7842e561999-12-16 21:34:53 +0000626class cmap_format_4(CmapSubtable):
627
628 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000629 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
630 # If not, someone is calling the subtable decompile() directly, and must provide both args.
631 if data != None and ttFont != None:
632 self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
633 else:
pabs317012aa2009-11-08 15:55:53 +0000634 assert (data == None and ttFont == None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000635
636 data = self.data # decompileHeader assigns the data after the header to self.data
637 (segCountX2, searchRange, entrySelector, rangeShift) = \
638 struct.unpack(">4H", data[:8])
639 data = data[8:]
Just7842e561999-12-16 21:34:53 +0000640 segCount = segCountX2 / 2
641
jvr542b9512002-07-20 21:57:26 +0000642 allCodes = array.array("H")
jvrd299b552006-10-21 13:54:30 +0000643 allCodes.fromstring(data)
644 self.data = data = None
645
jvr9be387c2008-03-01 11:43:01 +0000646 if sys.byteorder <> "big":
jvr542b9512002-07-20 21:57:26 +0000647 allCodes.byteswap()
Just7842e561999-12-16 21:34:53 +0000648
649 # divide the data
jvr542b9512002-07-20 21:57:26 +0000650 endCode = allCodes[:segCount]
651 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field
652 startCode = allCodes[:segCount]
653 allCodes = allCodes[segCount:]
654 idDelta = allCodes[:segCount]
655 allCodes = allCodes[segCount:]
656 idRangeOffset = allCodes[:segCount]
657 glyphIndexArray = allCodes[segCount:]
jvrd299b552006-10-21 13:54:30 +0000658 lenGIArray = len(glyphIndexArray)
659
Just7842e561999-12-16 21:34:53 +0000660 # build 2-byte character mapping
jvrd299b552006-10-21 13:54:30 +0000661 charCodes = []
662 gids = []
Just7842e561999-12-16 21:34:53 +0000663 for i in range(len(startCode) - 1): # don't do 0xffff!
jvrd299b552006-10-21 13:54:30 +0000664 rangeCharCodes = range(startCode[i], endCode[i] + 1)
665 charCodes = charCodes + rangeCharCodes
666 for charCode in rangeCharCodes:
Just7842e561999-12-16 21:34:53 +0000667 rangeOffset = idRangeOffset[i]
668 if rangeOffset == 0:
669 glyphID = charCode + idDelta[i]
670 else:
671 # *someone* needs to get killed.
672 index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
jvrd299b552006-10-21 13:54:30 +0000673 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray)
Just7842e561999-12-16 21:34:53 +0000674 if glyphIndexArray[index] <> 0: # if not missing glyph
675 glyphID = glyphIndexArray[index] + idDelta[i]
676 else:
677 glyphID = 0 # missing glyph
jvrd299b552006-10-21 13:54:30 +0000678 gids.append(glyphID % 0x10000)
679
680 self.cmap = cmap = {}
681 lenCmap = len(gids)
682 glyphOrder = self.ttFont.getGlyphOrder()
683 try:
684 names = map(operator.getitem, [glyphOrder]*lenCmap, gids )
685 except IndexError:
686 getGlyphName = self.ttFont.getGlyphName
687 names = map(getGlyphName, gids )
688 map(operator.setitem, [cmap]*lenCmap, charCodes, names)
689
690
691
692 def setIDDelta(self, idDelta):
693 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
694 # idDelta is a short, and must be between -32K and 32K
695 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
696 # This means that we have a problem because we can need to assign to idDelta values
697 # between -(64K-2) and 64K -1.
698 # Since the final gi is reconstructed from the glyphArray GID by:
699 # (short)finalGID = (gid + idDelta) % 0x10000),
700 # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
701 # negative number to an unsigned short.
702 # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
703 # the modulo arithmetic.
704
705 if idDelta > 0x7FFF:
706 idDelta = idDelta - 0x10000
707 elif idDelta < -0x7FFF:
708 idDelta = idDelta + 0x10000
709
710 return idDelta
711
712
Just7842e561999-12-16 21:34:53 +0000713 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000714 if self.data:
715 return struct.pack(">HHH", self.format, self.length, self.language) + self.data
716
jvrea9dfa92002-05-12 17:14:50 +0000717 from fontTools.ttLib.sfnt import maxPowerOfTwo
Just7842e561999-12-16 21:34:53 +0000718
jvrd299b552006-10-21 13:54:30 +0000719 charCodes = self.cmap.keys()
jvrd299b552006-10-21 13:54:30 +0000720 lenCharCodes = len(charCodes)
721 if lenCharCodes == 0:
722 startCode = [0xffff]
723 endCode = [0xffff]
724 else:
jvr2db352c2008-02-29 14:43:49 +0000725 charCodes.sort()
726 names = map(operator.getitem, [self.cmap]*lenCharCodes, charCodes)
jvrd299b552006-10-21 13:54:30 +0000727 nameMap = ttFont.getReverseGlyphMap()
728 try:
729 gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
730 except KeyError:
731 nameMap = ttFont.getReverseGlyphMap(rebuild=1)
732 try:
733 gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
734 except KeyError:
735 # allow virtual GIDs in format 4 tables
736 gids = []
737 for name in names:
738 try:
739 gid = nameMap[name]
740 except KeyError:
741 try:
742 if (name[:3] == 'gid'):
743 gid = eval(name[3:])
744 else:
745 gid = ttFont.getGlyphID(name)
746 except:
747 raise KeyError(name)
748
749 gids.append(gid)
750 cmap = {} # code:glyphID mapping
751 map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)
Just7842e561999-12-16 21:34:53 +0000752
jvrd299b552006-10-21 13:54:30 +0000753 # Build startCode and endCode lists.
754 # Split the char codes in ranges of consecutive char codes, then split
755 # each range in more ranges of consecutive/not consecutive glyph IDs.
756 # See splitRange().
757 lastCode = charCodes[0]
758 endCode = []
759 startCode = [lastCode]
760 for charCode in charCodes[1:]: # skip the first code, it's the first start code
761 if charCode == lastCode + 1:
762 lastCode = charCode
763 continue
764 start, end = splitRange(startCode[-1], lastCode, cmap)
765 startCode.extend(start)
766 endCode.extend(end)
767 startCode.append(charCode)
jvr542b9512002-07-20 21:57:26 +0000768 lastCode = charCode
jvrd299b552006-10-21 13:54:30 +0000769 endCode.append(lastCode)
770 startCode.append(0xffff)
771 endCode.append(0xffff)
Just7842e561999-12-16 21:34:53 +0000772
jvr542b9512002-07-20 21:57:26 +0000773 # build up rest of cruft
Just7842e561999-12-16 21:34:53 +0000774 idDelta = []
775 idRangeOffset = []
776 glyphIndexArray = []
Just7842e561999-12-16 21:34:53 +0000777 for i in range(len(endCode)-1): # skip the closing codes (0xffff)
778 indices = []
jvr542b9512002-07-20 21:57:26 +0000779 for charCode in range(startCode[i], endCode[i] + 1):
780 indices.append(cmap[charCode])
jvrd299b552006-10-21 13:54:30 +0000781 if (indices == range(indices[0], indices[0] + len(indices))):
782 idDeltaTemp = self.setIDDelta(indices[0] - startCode[i])
783 idDelta.append( idDeltaTemp)
Just7842e561999-12-16 21:34:53 +0000784 idRangeOffset.append(0)
785 else:
786 # someone *definitely* needs to get killed.
787 idDelta.append(0)
788 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
jvr542b9512002-07-20 21:57:26 +0000789 glyphIndexArray.extend(indices)
Just7842e561999-12-16 21:34:53 +0000790 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
791 idRangeOffset.append(0)
792
793 # Insane.
794 segCount = len(endCode)
795 segCountX2 = segCount * 2
jvr542b9512002-07-20 21:57:26 +0000796 maxExponent = maxPowerOfTwo(segCount)
797 searchRange = 2 * (2 ** maxExponent)
798 entrySelector = maxExponent
Just7842e561999-12-16 21:34:53 +0000799 rangeShift = 2 * segCount - searchRange
800
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400801 charCodeArray = array.array("H", endCode + [0] + startCode)
802 idDeltaeArray = array.array("h", idDelta)
803 restArray = array.array("H", idRangeOffset + glyphIndexArray)
jvr9be387c2008-03-01 11:43:01 +0000804 if sys.byteorder <> "big":
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400805 charCodeArray.byteswap()
806 idDeltaeArray.byteswap()
807 restArray.byteswap()
jvrd299b552006-10-21 13:54:30 +0000808 data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring()
809
Just7842e561999-12-16 21:34:53 +0000810 length = struct.calcsize(cmap_format_4_format) + len(data)
jvr0cd79a52004-09-25 07:30:47 +0000811 header = struct.pack(cmap_format_4_format, self.format, length, self.language,
Just7842e561999-12-16 21:34:53 +0000812 segCountX2, searchRange, entrySelector, rangeShift)
jvrd299b552006-10-21 13:54:30 +0000813 return header + data
Just7842e561999-12-16 21:34:53 +0000814
Just7842e561999-12-16 21:34:53 +0000815 def fromXML(self, (name, attrs, content), ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000816 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000817 if not hasattr(self, "cmap"):
818 self.cmap = {}
819 cmap = self.cmap
820
Just7842e561999-12-16 21:34:53 +0000821 for element in content:
jvr22dcb9e2002-05-10 19:03:34 +0000822 if type(element) <> TupleType:
Just7842e561999-12-16 21:34:53 +0000823 continue
jvrd299b552006-10-21 13:54:30 +0000824 nameMap, attrsMap, dummyContent = element
825 if nameMap <> "map":
826 assert 0, "Unrecognized keyword in cmap subtable"
827 cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
Just7842e561999-12-16 21:34:53 +0000828
829
830class cmap_format_6(CmapSubtable):
831
832 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000833 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
834 # If not, someone is calling the subtable decompile() directly, and must provide both args.
835 if data != None and ttFont != None:
836 self.decompileHeader(data[offset:offset+int(length)], ttFont)
837 else:
pabs317012aa2009-11-08 15:55:53 +0000838 assert (data == None and ttFont == None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000839
840 data = self.data # decompileHeader assigns the data after the header to self.data
841 firstCode, entryCount = struct.unpack(">HH", data[:4])
Just7842e561999-12-16 21:34:53 +0000842 firstCode = int(firstCode)
jvrd299b552006-10-21 13:54:30 +0000843 data = data[4:]
Justf6b15632000-08-23 12:33:14 +0000844 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!!
Just7842e561999-12-16 21:34:53 +0000845 glyphIndexArray = array.array("H")
Just43fa4be2000-10-11 18:04:03 +0000846 glyphIndexArray.fromstring(data[:2 * int(entryCount)])
jvr9be387c2008-03-01 11:43:01 +0000847 if sys.byteorder <> "big":
Just7842e561999-12-16 21:34:53 +0000848 glyphIndexArray.byteswap()
jvrd299b552006-10-21 13:54:30 +0000849 self.data = data = None
850
Just7842e561999-12-16 21:34:53 +0000851 self.cmap = cmap = {}
jvrd299b552006-10-21 13:54:30 +0000852
853 lenArray = len(glyphIndexArray)
854 charCodes = range(firstCode, firstCode + lenArray )
855 glyphOrder = self.ttFont.getGlyphOrder()
856 try:
857 names = map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray )
858 except IndexError:
859 getGlyphName = self.ttFont.getGlyphName
860 names = map(getGlyphName, glyphIndexArray )
861 map(operator.setitem, [cmap]*lenArray, charCodes, names)
Just7842e561999-12-16 21:34:53 +0000862
863 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000864 if self.data:
865 return struct.pack(">HHH", self.format, self.length, self.language) + self.data
866 cmap = self.cmap
867 codes = cmap.keys()
868 if codes: # yes, there are empty cmap tables.
869 codes.sort()
870 lenCodes = len(codes)
871 assert codes == range(codes[0], codes[0] + lenCodes)
872 firstCode = codes[0]
873 valueList = map(operator.getitem, [cmap]*lenCodes, codes)
874 valueList = map(ttFont.getGlyphID, valueList)
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400875 glyphIndexArray = array.array("H", valueList)
jvr9be387c2008-03-01 11:43:01 +0000876 if sys.byteorder <> "big":
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400877 glyphIndexArray.byteswap()
jvrd299b552006-10-21 13:54:30 +0000878 data = glyphIndexArray.tostring()
879 else:
880 data = ""
881 firstCode = 0
Just7842e561999-12-16 21:34:53 +0000882 header = struct.pack(">HHHHH",
jvrd299b552006-10-21 13:54:30 +0000883 6, len(data) + 10, self.language, firstCode, len(codes))
Just7842e561999-12-16 21:34:53 +0000884 return header + data
885
Just7842e561999-12-16 21:34:53 +0000886 def fromXML(self, (name, attrs, content), ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000887 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000888 if not hasattr(self, "cmap"):
889 self.cmap = {}
890 cmap = self.cmap
891
Just7842e561999-12-16 21:34:53 +0000892 for element in content:
jvr22dcb9e2002-05-10 19:03:34 +0000893 if type(element) <> TupleType:
Just7842e561999-12-16 21:34:53 +0000894 continue
895 name, attrs, content = element
896 if name <> "map":
897 continue
jvrd299b552006-10-21 13:54:30 +0000898 cmap[safeEval(attrs["code"])] = attrs["name"]
Just7842e561999-12-16 21:34:53 +0000899
900
jvr924e4e22003-02-08 10:45:23 +0000901class cmap_format_12(CmapSubtable):
902
jvrd299b552006-10-21 13:54:30 +0000903 def __init__(self, format):
904 self.format = format
905 self.reserved = 0
906 self.data = None
907 self.ttFont = None
908
909 def decompileHeader(self, data, ttFont):
jvr924e4e22003-02-08 10:45:23 +0000910 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
jvrd299b552006-10-21 13:54:30 +0000911 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format 12 (data length: %d, header length: %d)" % (len(data), length)
jvr924e4e22003-02-08 10:45:23 +0000912 self.format = format
913 self.reserved = reserved
914 self.length = length
915 self.language = language
916 self.nGroups = nGroups
jvrd299b552006-10-21 13:54:30 +0000917 self.data = data[16:]
918 self.ttFont = ttFont
919
920 def decompile(self, data, ttFont):
921 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
922 # If not, someone is calling the subtable decompile() directly, and must provide both args.
923 if data != None and ttFont != None:
924 self.decompileHeader(data[offset:offset+int(length)], ttFont)
925 else:
pabs317012aa2009-11-08 15:55:53 +0000926 assert (data == None and ttFont == None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000927
928 data = self.data # decompileHeader assigns the data after the header to self.data
929 charCodes = []
930 gids = []
931 pos = 0
932 for i in range(self.nGroups):
933 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
934 pos += 12
935 lenGroup = 1 + endCharCode - startCharCode
936 charCodes += range(startCharCode, endCharCode +1)
937 gids += range(glyphID, glyphID + lenGroup)
938 self.data = data = None
939 self.cmap = cmap = {}
940 lenCmap = len(gids)
941 glyphOrder = self.ttFont.getGlyphOrder()
942 try:
943 names = map(operator.getitem, [glyphOrder]*lenCmap, gids )
944 except IndexError:
945 getGlyphName = self.ttFont.getGlyphName
946 names = map(getGlyphName, gids )
947 map(operator.setitem, [cmap]*lenCmap, charCodes, names)
jvr924e4e22003-02-08 10:45:23 +0000948
949 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000950 if self.data:
951 return struct.pack(">HHLLL", self.format, self.reserved , self.length, self.language, self.nGroups) + self.data
jvr924e4e22003-02-08 10:45:23 +0000952 charCodes = self.cmap.keys()
jvrd299b552006-10-21 13:54:30 +0000953 lenCharCodes = len(charCodes)
954 names = self.cmap.values()
955 nameMap = ttFont.getReverseGlyphMap()
956 try:
957 gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
958 except KeyError:
959 nameMap = ttFont.getReverseGlyphMap(rebuild=1)
960 try:
961 gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
962 except KeyError:
963 # allow virtual GIDs in format 12 tables
964 gids = []
965 for name in names:
966 try:
967 gid = nameMap[name]
968 except KeyError:
969 try:
970 if (name[:3] == 'gid'):
971 gid = eval(name[3:])
972 else:
973 gid = ttFont.getGlyphID(name)
974 except:
975 raise KeyError(name)
976
977 gids.append(gid)
978
979 cmap = {} # code:glyphID mapping
980 map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)
981
jvr924e4e22003-02-08 10:45:23 +0000982 charCodes.sort()
jvrd299b552006-10-21 13:54:30 +0000983 index = 0
jvr924e4e22003-02-08 10:45:23 +0000984 startCharCode = charCodes[0]
985 startGlyphID = cmap[startCharCode]
jvrd299b552006-10-21 13:54:30 +0000986 lastGlyphID = startGlyphID - 1
987 lastCharCode = startCharCode - 1
jvr0cd79a52004-09-25 07:30:47 +0000988 nGroups = 0
jvrd299b552006-10-21 13:54:30 +0000989 dataList = []
990 maxIndex = len(charCodes)
991 for index in range(maxIndex):
992 charCode = charCodes[index]
jvr924e4e22003-02-08 10:45:23 +0000993 glyphID = cmap[charCode]
jvrd299b552006-10-21 13:54:30 +0000994 if (glyphID != 1 + lastGlyphID) or (charCode != 1 + lastCharCode):
995 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
jvr924e4e22003-02-08 10:45:23 +0000996 startCharCode = charCode
jvrd299b552006-10-21 13:54:30 +0000997 startGlyphID = glyphID
jvr924e4e22003-02-08 10:45:23 +0000998 nGroups = nGroups + 1
jvrd299b552006-10-21 13:54:30 +0000999 lastGlyphID = glyphID
1000 lastCharCode = charCode
1001 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
jvr0cd79a52004-09-25 07:30:47 +00001002 nGroups = nGroups + 1
jvrd299b552006-10-21 13:54:30 +00001003 data = "".join(dataList)
1004 lengthSubtable = len(data) +16
1005 assert len(data) == (nGroups*12) == (lengthSubtable-16)
1006 return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
jvr924e4e22003-02-08 10:45:23 +00001007
1008 def toXML(self, writer, ttFont):
1009 writer.begintag(self.__class__.__name__, [
1010 ("platformID", self.platformID),
1011 ("platEncID", self.platEncID),
1012 ("format", self.format),
1013 ("reserved", self.reserved),
1014 ("length", self.length),
1015 ("language", self.language),
1016 ("nGroups", self.nGroups),
1017 ])
1018 writer.newline()
jvra84b28d2004-09-25 09:06:58 +00001019 codes = self.cmap.items()
1020 codes.sort()
1021 self._writeCodes(codes, writer)
jvr924e4e22003-02-08 10:45:23 +00001022 writer.endtag(self.__class__.__name__)
1023 writer.newline()
1024
1025 def fromXML(self, (name, attrs, content), ttFont):
jvrd299b552006-10-21 13:54:30 +00001026 self.format = safeEval(attrs["format"])
1027 self.reserved = safeEval(attrs["reserved"])
1028 self.length = safeEval(attrs["length"])
jvr924e4e22003-02-08 10:45:23 +00001029 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +00001030 self.nGroups = safeEval(attrs["nGroups"])
1031 if not hasattr(self, "cmap"):
1032 self.cmap = {}
1033 cmap = self.cmap
1034
jvr924e4e22003-02-08 10:45:23 +00001035 for element in content:
1036 if type(element) <> TupleType:
1037 continue
1038 name, attrs, content = element
1039 if name <> "map":
1040 continue
jvrd299b552006-10-21 13:54:30 +00001041 cmap[safeEval(attrs["code"])] = attrs["name"]
jvr924e4e22003-02-08 10:45:23 +00001042
1043
jvr0cb8a082008-05-16 15:07:09 +00001044def cvtToUVS(threeByteString):
1045 if sys.byteorder <> "big":
1046 data = "\0" +threeByteString
1047 else:
1048 data = threeByteString + "\0"
1049 val, = struct.unpack(">L", data)
1050 return val
1051
1052def cvtFromUVS(val):
1053 if sys.byteorder <> "big":
1054 threeByteString = struct.pack(">L", val)[1:]
1055 else:
1056 threeByteString = struct.pack(">L", val)[:3]
1057 return threeByteString
1058
1059def cmpUVSListEntry(first, second):
1060 uv1, glyphName1 = first
1061 uv2, glyphName2 = second
1062
1063 if (glyphName1 == None) and (glyphName2 != None):
1064 return -1
1065 elif (glyphName2 == None) and (glyphName1 != None):
1066 return 1
1067
1068 ret = cmp(uv1, uv2)
1069 if ret:
1070 return ret
1071 return cmp(glyphName1, glyphName2)
1072
1073
1074class cmap_format_14(CmapSubtable):
1075
1076 def decompileHeader(self, data, ttFont):
1077 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1078 self.data = data[10:]
1079 self.length = length
1080 self.numVarSelectorRecords = numVarSelectorRecords
1081 self.ttFont = ttFont
1082 self.language = 0xFF # has no language.
1083
1084 def decompile(self, data, ttFont):
1085 if data != None and ttFont != None:
1086 self.decompileHeader(data, ttFont)
1087 else:
pabs317012aa2009-11-08 15:55:53 +00001088 assert (data == None and ttFont == None), "Need both data and ttFont arguments"
jvr0cb8a082008-05-16 15:07:09 +00001089 data = self.data
1090
1091 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1092 uvsDict = {}
1093 recOffset = 0
1094 for n in range(self.numVarSelectorRecords):
1095 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11])
1096 recOffset += 11
1097 varUVS = cvtToUVS(uvs)
1098 if defOVSOffset:
1099 startOffset = defOVSOffset - 10
1100 numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1101 startOffset +=4
1102 for r in range(numValues):
1103 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1104 startOffset += 4
1105 firstBaseUV = cvtToUVS(uv)
1106 cnt = addtlCnt+1
1107 baseUVList = range(firstBaseUV, firstBaseUV+cnt)
1108 glyphList = [None]*cnt
1109 localUVList = zip(baseUVList, glyphList)
1110 try:
1111 uvsDict[varUVS].extend(localUVList)
1112 except KeyError:
1113 uvsDict[varUVS] = localUVList
1114
1115 if nonDefUVSOffset:
1116 startOffset = nonDefUVSOffset - 10
1117 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1118 startOffset +=4
1119 localUVList = []
1120 for r in range(numRecs):
1121 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1122 startOffset += 5
1123 uv = cvtToUVS(uv)
1124 glyphName = self.ttFont.getGlyphName(gid)
1125 localUVList.append( [uv, glyphName] )
1126 try:
1127 uvsDict[varUVS].extend(localUVList)
1128 except KeyError:
1129 uvsDict[varUVS] = localUVList
1130
1131 self.uvsDict = uvsDict
1132
1133 def toXML(self, writer, ttFont):
1134 writer.begintag(self.__class__.__name__, [
1135 ("platformID", self.platformID),
1136 ("platEncID", self.platEncID),
1137 ("format", self.format),
1138 ("length", self.length),
1139 ("numVarSelectorRecords", self.numVarSelectorRecords),
1140 ])
1141 writer.newline()
1142 uvsDict = self.uvsDict
1143 uvsList = uvsDict.keys()
1144 uvsList.sort()
1145 for uvs in uvsList:
1146 uvList = uvsDict[uvs]
1147 uvList.sort(cmpUVSListEntry)
1148 for uv, gname in uvList:
1149 if gname == None:
1150 gname = "None"
1151 # I use the arg rather than th keyword syntax in order to preserve the attribute order.
1152 writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] )
1153 writer.newline()
1154 writer.endtag(self.__class__.__name__)
1155 writer.newline()
1156
1157 def fromXML(self, (name, attrs, content), ttFont):
1158 self.format = safeEval(attrs["format"])
1159 self.length = safeEval(attrs["length"])
1160 self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
1161 self.language = 0xFF # provide a value so that CmapSubtable.__cmp__() won't fail
1162 if not hasattr(self, "cmap"):
1163 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1164 if not hasattr(self, "uvsDict"):
1165 self.uvsDict = {}
1166 uvsDict = self.uvsDict
1167
1168 for element in content:
1169 if type(element) <> TupleType:
1170 continue
1171 name, attrs, content = element
1172 if name <> "map":
1173 continue
1174 uvs = safeEval(attrs["uvs"])
1175 uv = safeEval(attrs["uv"])
1176 gname = attrs["name"]
1177 if gname == "None":
1178 gname = None
1179 try:
1180 uvsDict[uvs].append( [uv, gname])
1181 except KeyError:
1182 uvsDict[uvs] = [ [uv, gname] ]
1183
1184
1185 def compile(self, ttFont):
1186 if self.data:
1187 return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
1188
1189 uvsDict = self.uvsDict
1190 uvsList = uvsDict.keys()
1191 uvsList.sort()
1192 self.numVarSelectorRecords = len(uvsList)
1193 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1194 data = []
1195 varSelectorRecords =[]
1196 for uvs in uvsList:
1197 entryList = uvsDict[uvs]
1198
1199 defList = filter(lambda entry: entry[1] == None, entryList)
1200 if defList:
1201 defList = map(lambda entry: entry[0], defList)
1202 defOVSOffset = offset
1203 defList.sort()
1204
1205 lastUV = defList[0]
1206 cnt = -1
1207 defRecs = []
1208 for defEntry in defList:
1209 cnt +=1
1210 if (lastUV+cnt) != defEntry:
1211 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1212 lastUV = defEntry
1213 defRecs.append(rec)
1214 cnt = 0
1215
1216 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1217 defRecs.append(rec)
1218
1219 numDefRecs = len(defRecs)
1220 data.append(struct.pack(">L", numDefRecs))
1221 data.extend(defRecs)
1222 offset += 4 + numDefRecs*4
1223 else:
1224 defOVSOffset = 0
1225
1226 ndefList = filter(lambda entry: entry[1] != None, entryList)
1227 if ndefList:
1228 nonDefUVSOffset = offset
1229 ndefList.sort()
1230 numNonDefRecs = len(ndefList)
1231 data.append(struct.pack(">L", numNonDefRecs))
1232 offset += 4 + numNonDefRecs*5
1233
1234 for uv, gname in ndefList:
1235 gid = ttFont.getGlyphID(gname)
1236 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1237 data.append(ndrec)
1238 else:
1239 nonDefUVSOffset = 0
1240
1241 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1242 varSelectorRecords.append(vrec)
1243
1244 data = "".join(varSelectorRecords) + "".join(data)
1245 self.length = 10 + len(data)
1246 headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
1247 self.data = headerdata + data
1248
1249 return self.data
1250
1251
Just7842e561999-12-16 21:34:53 +00001252class cmap_format_unknown(CmapSubtable):
1253
jvra84b28d2004-09-25 09:06:58 +00001254 def toXML(self, writer, ttFont):
jvrd299b552006-10-21 13:54:30 +00001255 cmapName = self.__class__.__name__[:12] + str(self.format)
1256 writer.begintag(cmapName, [
jvra84b28d2004-09-25 09:06:58 +00001257 ("platformID", self.platformID),
1258 ("platEncID", self.platEncID),
1259 ])
1260 writer.newline()
jvrd299b552006-10-21 13:54:30 +00001261 writer.dumphex(self.data)
1262 writer.endtag(cmapName)
jvra84b28d2004-09-25 09:06:58 +00001263 writer.newline()
1264
jvrd299b552006-10-21 13:54:30 +00001265 def fromXML(self, (name, attrs, content), ttFont):
1266 self.data = readHex(content)
1267 self.cmap = {}
1268
1269 def decompileHeader(self, data, ttFont):
jvr427f9802004-09-26 18:32:50 +00001270 self.language = 0 # dummy value
Just7842e561999-12-16 21:34:53 +00001271 self.data = data
1272
jvrd299b552006-10-21 13:54:30 +00001273 def decompile(self, data, ttFont):
1274 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1275 # If not, someone is calling the subtable decompile() directly, and must provide both args.
1276 if data != None and ttFont != None:
1277 self.decompileHeader(data[offset:offset+int(length)], ttFont)
1278 else:
pabs317012aa2009-11-08 15:55:53 +00001279 assert (data == None and ttFont == None), "Need both data and ttFont arguments"
Just7842e561999-12-16 21:34:53 +00001280
jvrd299b552006-10-21 13:54:30 +00001281 def compile(self, ttFont):
1282 if self.data:
1283 return self.data
1284 else:
1285 return None
Just7842e561999-12-16 21:34:53 +00001286
1287cmap_classes = {
1288 0: cmap_format_0,
1289 2: cmap_format_2,
1290 4: cmap_format_4,
1291 6: cmap_format_6,
jvr924e4e22003-02-08 10:45:23 +00001292 12: cmap_format_12,
jvr0cb8a082008-05-16 15:07:09 +00001293 14: cmap_format_14,
Just7842e561999-12-16 21:34:53 +00001294 }