blob: 3398a889621c1ba147c5c645987d4d5f9b472574 [file] [log] [blame]
Behdad Esfahbod1ae29592014-01-14 15:07:50 +08001from __future__ import print_function, division, absolute_import
Behdad Esfahbod30e691e2013-11-27 17:27:45 -05002from fontTools.misc.py23 import *
3from fontTools.misc.textTools import safeEval, readHex
Behdad Esfahbod0f74e802014-03-12 12:17:33 -07004from fontTools.unicode import Unicode
Behdad Esfahbod2b06aaa2013-11-27 02:34:11 -05005from . import DefaultTable
Behdad Esfahbod30e691e2013-11-27 17:27:45 -05006import sys
Just7842e561999-12-16 21:34:53 +00007import struct
Just7842e561999-12-16 21:34:53 +00008import array
jvrd299b552006-10-21 13:54:30 +00009import operator
Just7842e561999-12-16 21:34:53 +000010
11
12class table__c_m_a_p(DefaultTable.DefaultTable):
13
14 def getcmap(self, platformID, platEncID):
15 for subtable in self.tables:
16 if (subtable.platformID == platformID and
17 subtable.platEncID == platEncID):
18 return subtable
19 return None # not found
20
21 def decompile(self, data, ttFont):
22 tableVersion, numSubTables = struct.unpack(">HH", data[:4])
23 self.tableVersion = int(tableVersion)
24 self.tables = tables = []
jvrd299b552006-10-21 13:54:30 +000025 seenOffsets = {}
Just7842e561999-12-16 21:34:53 +000026 for i in range(numSubTables):
27 platformID, platEncID, offset = struct.unpack(
28 ">HHl", data[4+i*8:4+(i+1)*8])
29 platformID, platEncID = int(platformID), int(platEncID)
30 format, length = struct.unpack(">HH", data[offset:offset+4])
Roozbeh Pournader51a17822013-10-09 15:55:07 -070031 if format in [8,10,12,13]:
jvr924e4e22003-02-08 10:45:23 +000032 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
jvr0cb8a082008-05-16 15:07:09 +000033 elif format in [14]:
34 format, length = struct.unpack(">HL", data[offset:offset+6])
35
jvr2db352c2008-02-29 14:43:49 +000036 if not length:
Behdad Esfahbod3ec6a252013-11-27 04:57:33 -050037 print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset))
jvr2db352c2008-02-29 14:43:49 +000038 continue
Behdad Esfahbodbc5e1cb2013-11-27 02:33:03 -050039 if format not in cmap_classes:
Just7842e561999-12-16 21:34:53 +000040 table = cmap_format_unknown(format)
41 else:
42 table = cmap_classes[format](format)
43 table.platformID = platformID
44 table.platEncID = platEncID
jvrd299b552006-10-21 13:54:30 +000045 # Note that by default we decompile only the subtable header info;
46 # any other data gets decompiled only when an attribute of the
47 # subtable is referenced.
48 table.decompileHeader(data[offset:offset+int(length)], ttFont)
Behdad Esfahbodbc5e1cb2013-11-27 02:33:03 -050049 if offset in seenOffsets:
jvrd299b552006-10-21 13:54:30 +000050 table.cmap = tables[seenOffsets[offset]].cmap
51 else:
52 seenOffsets[offset] = i
Just7842e561999-12-16 21:34:53 +000053 tables.append(table)
54
55 def compile(self, ttFont):
Behdad Esfahbodb7fd2e12013-11-27 18:58:45 -050056 self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__()
Just7842e561999-12-16 21:34:53 +000057 numSubTables = len(self.tables)
58 totalOffset = 4 + 8 * numSubTables
59 data = struct.pack(">HH", self.tableVersion, numSubTables)
Behdad Esfahbod821572c2013-11-27 21:09:03 -050060 tableData = b""
jvrd299b552006-10-21 13:54:30 +000061 seen = {} # Some tables are the same object reference. Don't compile them twice.
62 done = {} # Some tables are different objects, but compile to the same data chunk
Just7842e561999-12-16 21:34:53 +000063 for table in self.tables:
jvrd299b552006-10-21 13:54:30 +000064 try:
65 offset = seen[id(table.cmap)]
66 except KeyError:
67 chunk = table.compile(ttFont)
Behdad Esfahbodbc5e1cb2013-11-27 02:33:03 -050068 if chunk in done:
jvrd299b552006-10-21 13:54:30 +000069 offset = done[chunk]
70 else:
71 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
72 tableData = tableData + chunk
Just7842e561999-12-16 21:34:53 +000073 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
74 return data + tableData
75
76 def toXML(self, writer, ttFont):
77 writer.simpletag("tableVersion", version=self.tableVersion)
78 writer.newline()
79 for table in self.tables:
80 table.toXML(writer, ttFont)
81
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -050082 def fromXML(self, name, attrs, content, ttFont):
Just7842e561999-12-16 21:34:53 +000083 if name == "tableVersion":
84 self.tableVersion = safeEval(attrs["version"])
85 return
Behdad Esfahbod180ace62013-11-27 02:40:30 -050086 if name[:12] != "cmap_format_":
Just7842e561999-12-16 21:34:53 +000087 return
88 if not hasattr(self, "tables"):
89 self.tables = []
jvr0cd79a52004-09-25 07:30:47 +000090 format = safeEval(name[12:])
Behdad Esfahbodbc5e1cb2013-11-27 02:33:03 -050091 if format not in cmap_classes:
Just7842e561999-12-16 21:34:53 +000092 table = cmap_format_unknown(format)
93 else:
94 table = cmap_classes[format](format)
95 table.platformID = safeEval(attrs["platformID"])
96 table.platEncID = safeEval(attrs["platEncID"])
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -050097 table.fromXML(name, attrs, content, ttFont)
Just7842e561999-12-16 21:34:53 +000098 self.tables.append(table)
99
100
Behdad Esfahbode388db52013-11-28 14:26:58 -0500101class CmapSubtable(object):
Just7842e561999-12-16 21:34:53 +0000102
103 def __init__(self, format):
104 self.format = format
jvrd299b552006-10-21 13:54:30 +0000105 self.data = None
106 self.ttFont = None
107
108 def __getattr__(self, attr):
109 # allow lazy decompilation of subtables.
110 if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
Behdad Esfahbodcd5aad92013-11-27 02:42:28 -0500111 raise AttributeError(attr)
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500112 if self.data is None:
Behdad Esfahbodcd5aad92013-11-27 02:42:28 -0500113 raise AttributeError(attr)
jvrd299b552006-10-21 13:54:30 +0000114 self.decompile(None, None) # use saved data.
115 self.data = None # Once this table has been decompiled, make sure we don't
116 # just return the original data. Also avoids recursion when
117 # called with an attribute that the cmap subtable doesn't have.
118 return getattr(self, attr)
Just7842e561999-12-16 21:34:53 +0000119
jvrd299b552006-10-21 13:54:30 +0000120 def decompileHeader(self, data, ttFont):
121 format, length, language = struct.unpack(">HHH", data[:6])
122 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
123 self.format = int(format)
124 self.length = int(length)
125 self.language = int(language)
126 self.data = data[6:]
127 self.ttFont = ttFont
128
Just7842e561999-12-16 21:34:53 +0000129 def toXML(self, writer, ttFont):
130 writer.begintag(self.__class__.__name__, [
131 ("platformID", self.platformID),
132 ("platEncID", self.platEncID),
jvra84b28d2004-09-25 09:06:58 +0000133 ("language", self.language),
Just7842e561999-12-16 21:34:53 +0000134 ])
135 writer.newline()
Behdad Esfahbodac1b4352013-11-27 04:15:34 -0500136 codes = sorted(self.cmap.items())
jvra84b28d2004-09-25 09:06:58 +0000137 self._writeCodes(codes, writer)
Just7842e561999-12-16 21:34:53 +0000138 writer.endtag(self.__class__.__name__)
139 writer.newline()
jvra84b28d2004-09-25 09:06:58 +0000140
Behdad Esfahbod0f74e802014-03-12 12:17:33 -0700141 def isUnicode(self):
142 return (self.platformID == 0 or
143 (self.platformID == 3 and self.platEncID in [1, 10]))
144
145 def isSymbol(self):
146 return self.platformID == 3 and self.platEncID == 0
147
jvra84b28d2004-09-25 09:06:58 +0000148 def _writeCodes(self, codes, writer):
Behdad Esfahbod0f74e802014-03-12 12:17:33 -0700149 isUnicode = self.isUnicode()
jvra84b28d2004-09-25 09:06:58 +0000150 for code, name in codes:
151 writer.simpletag("map", code=hex(code), name=name)
152 if isUnicode:
153 writer.comment(Unicode[code])
154 writer.newline()
Just7842e561999-12-16 21:34:53 +0000155
Behdad Esfahbodb7fd2e12013-11-27 18:58:45 -0500156 def __lt__(self, other):
157 if not isinstance(other, CmapSubtable):
Behdad Esfahbod273a9002013-12-07 03:40:44 -0500158 return NotImplemented
Behdad Esfahbod96b321c2013-08-17 11:11:22 -0400159
Behdad Esfahbodb7fd2e12013-11-27 18:58:45 -0500160 # implemented so that list.sort() sorts according to the spec.
Just7842e561999-12-16 21:34:53 +0000161 selfTuple = (
Behdad Esfahbod94118dc2013-10-28 12:16:41 +0100162 getattr(self, "platformID", None),
163 getattr(self, "platEncID", None),
164 getattr(self, "language", None),
165 self.__dict__)
Just7842e561999-12-16 21:34:53 +0000166 otherTuple = (
Behdad Esfahbod94118dc2013-10-28 12:16:41 +0100167 getattr(other, "platformID", None),
168 getattr(other, "platEncID", None),
169 getattr(other, "language", None),
170 other.__dict__)
Behdad Esfahbodb7fd2e12013-11-27 18:58:45 -0500171 return selfTuple < otherTuple
Just7842e561999-12-16 21:34:53 +0000172
173
174class cmap_format_0(CmapSubtable):
175
176 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000177 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
178 # If not, someone is calling the subtable decompile() directly, and must provide both args.
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500179 if data is not None and ttFont is not None:
jvrd299b552006-10-21 13:54:30 +0000180 self.decompileHeader(data[offset:offset+int(length)], ttFont)
181 else:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500182 assert (data is None and ttFont is None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000183 data = self.data # decompileHeader assigns the data after the header to self.data
184 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
Just7842e561999-12-16 21:34:53 +0000185 glyphIdArray = array.array("B")
jvrd299b552006-10-21 13:54:30 +0000186 glyphIdArray.fromstring(self.data)
Just7842e561999-12-16 21:34:53 +0000187 self.cmap = cmap = {}
jvrd299b552006-10-21 13:54:30 +0000188 lenArray = len(glyphIdArray)
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -0500189 charCodes = list(range(lenArray))
jvrd299b552006-10-21 13:54:30 +0000190 names = map(self.ttFont.getGlyphName, glyphIdArray)
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500191 list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
jvrd299b552006-10-21 13:54:30 +0000192
Just7842e561999-12-16 21:34:53 +0000193
194 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000195 if self.data:
196 return struct.pack(">HHH", 0, 262, self.language) + self.data
197
Behdad Esfahbodac1b4352013-11-27 04:15:34 -0500198 charCodeList = sorted(self.cmap.items())
jvrd299b552006-10-21 13:54:30 +0000199 charCodes = [entry[0] for entry in charCodeList]
200 valueList = [entry[1] for entry in charCodeList]
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -0500201 assert charCodes == list(range(256))
jvrd299b552006-10-21 13:54:30 +0000202 valueList = map(ttFont.getGlyphID, valueList)
203
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400204 glyphIdArray = array.array("B", valueList)
jvr0cd79a52004-09-25 07:30:47 +0000205 data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
Just7842e561999-12-16 21:34:53 +0000206 assert len(data) == 262
207 return data
208
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -0500209 def fromXML(self, name, attrs, content, ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000210 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000211 if not hasattr(self, "cmap"):
212 self.cmap = {}
213 cmap = self.cmap
Just7842e561999-12-16 21:34:53 +0000214 for element in content:
Behdad Esfahbodb774f9f2013-11-27 05:17:37 -0500215 if not isinstance(element, tuple):
Just7842e561999-12-16 21:34:53 +0000216 continue
217 name, attrs, content = element
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500218 if name != "map":
Just7842e561999-12-16 21:34:53 +0000219 continue
jvrd299b552006-10-21 13:54:30 +0000220 cmap[safeEval(attrs["code"])] = attrs["name"]
Just7842e561999-12-16 21:34:53 +0000221
222
jvrbafa66e2003-08-28 18:04:23 +0000223subHeaderFormat = ">HHhH"
Behdad Esfahbode388db52013-11-28 14:26:58 -0500224class SubHeader(object):
jvrbafa66e2003-08-28 18:04:23 +0000225 def __init__(self):
226 self.firstCode = None
227 self.entryCount = None
228 self.idDelta = None
229 self.idRangeOffset = None
230 self.glyphIndexArray = []
231
Just7842e561999-12-16 21:34:53 +0000232class cmap_format_2(CmapSubtable):
233
jvrd299b552006-10-21 13:54:30 +0000234 def setIDDelta(self, subHeader):
235 subHeader.idDelta = 0
236 # find the minGI which is not zero.
237 minGI = subHeader.glyphIndexArray[0]
238 for gid in subHeader.glyphIndexArray:
239 if (gid != 0) and (gid < minGI):
240 minGI = gid
241 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
242 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
243 # We would like to pick an idDelta such that the first glyphArray GID is 1,
244 # so that we are more likely to be able to combine glypharray GID subranges.
245 # This means that we have a problem when minGI is > 32K
246 # Since the final gi is reconstructed from the glyphArray GID by:
247 # (short)finalGID = (gid + idDelta) % 0x10000),
248 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
249 # negative number to an unsigned short.
250
251 if (minGI > 1):
252 if minGI > 0x7FFF:
253 subHeader.idDelta = -(0x10000 - minGI) -1
254 else:
255 subHeader.idDelta = minGI -1
256 idDelta = subHeader.idDelta
257 for i in range(subHeader.entryCount):
258 gid = subHeader.glyphIndexArray[i]
259 if gid > 0:
260 subHeader.glyphIndexArray[i] = gid - idDelta
261
262
Just7842e561999-12-16 21:34:53 +0000263 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000264 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
265 # If not, someone is calling the subtable decompile() directly, and must provide both args.
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500266 if data is not None and ttFont is not None:
jvrd299b552006-10-21 13:54:30 +0000267 self.decompileHeader(data[offset:offset+int(length)], ttFont)
268 else:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500269 assert (data is None and ttFont is None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000270
271 data = self.data # decompileHeader assigns the data after the header to self.data
jvrbafa66e2003-08-28 18:04:23 +0000272 subHeaderKeys = []
273 maxSubHeaderindex = 0
jvrbafa66e2003-08-28 18:04:23 +0000274 # get the key array, and determine the number of subHeaders.
jvrd299b552006-10-21 13:54:30 +0000275 allKeys = array.array("H")
276 allKeys.fromstring(data[:512])
277 data = data[512:]
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500278 if sys.byteorder != "big":
jvrd299b552006-10-21 13:54:30 +0000279 allKeys.byteswap()
Behdad Esfahbod32c10ee2013-11-27 17:46:17 -0500280 subHeaderKeys = [ key//8 for key in allKeys]
jvrd299b552006-10-21 13:54:30 +0000281 maxSubHeaderindex = max(subHeaderKeys)
Just7842e561999-12-16 21:34:53 +0000282
jvrbafa66e2003-08-28 18:04:23 +0000283 #Load subHeaders
284 subHeaderList = []
jvrd299b552006-10-21 13:54:30 +0000285 pos = 0
jvrbafa66e2003-08-28 18:04:23 +0000286 for i in range(maxSubHeaderindex + 1):
287 subHeader = SubHeader()
288 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
jvrd299b552006-10-21 13:54:30 +0000289 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
290 pos += 8
291 giDataPos = pos + subHeader.idRangeOffset-2
292 giList = array.array("H")
293 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500294 if sys.byteorder != "big":
jvrd299b552006-10-21 13:54:30 +0000295 giList.byteswap()
296 subHeader.glyphIndexArray = giList
jvrbafa66e2003-08-28 18:04:23 +0000297 subHeaderList.append(subHeader)
jvrbafa66e2003-08-28 18:04:23 +0000298 # How this gets processed.
299 # Charcodes may be one or two bytes.
300 # The first byte of a charcode is mapped through the subHeaderKeys, to select
301 # a subHeader. For any subheader but 0, the next byte is then mapped through the
302 # selected subheader. If subheader Index 0 is selected, then the byte itself is
303 # mapped through the subheader, and there is no second byte.
304 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
305 #
306 # Each subheader references a range in the glyphIndexArray whose length is entryCount.
307 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
308 # referenced by another subheader.
309 # The only subheader that will be referenced by more than one first-byte value is the subheader
310 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
311 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
312 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
313 # A subheader specifies a subrange within (0...256) by the
314 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
315 # (e.g. glyph not in font).
316 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
317 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
318 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
319 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
320 # Example for Logocut-Medium
321 # first byte of charcode = 129; selects subheader 1.
322 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
323 # second byte of charCode = 66
324 # the index offset = 66-64 = 2.
325 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
326 # [glyphIndexArray index], [subrange array index] = glyphIndex
327 # [256], [0]=1 from charcode [129, 64]
328 # [257], [1]=2 from charcode [129, 65]
329 # [258], [2]=3 from charcode [129, 66]
330 # [259], [3]=4 from charcode [129, 67]
jvrd299b552006-10-21 13:54:30 +0000331 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
332 # add it to the glyphID to get the final glyphIndex
jvrbafa66e2003-08-28 18:04:23 +0000333 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
jvrbafa66e2003-08-28 18:04:23 +0000334
Behdad Esfahbod5f6418d2013-11-27 22:00:49 -0500335 self.data = b""
jvrd299b552006-10-21 13:54:30 +0000336 self.cmap = cmap = {}
337 notdefGI = 0
jvrbafa66e2003-08-28 18:04:23 +0000338 for firstByte in range(256):
339 subHeadindex = subHeaderKeys[firstByte]
340 subHeader = subHeaderList[subHeadindex]
341 if subHeadindex == 0:
342 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
jvrd299b552006-10-21 13:54:30 +0000343 continue # gi is notdef.
jvrbafa66e2003-08-28 18:04:23 +0000344 else:
345 charCode = firstByte
346 offsetIndex = firstByte - subHeader.firstCode
347 gi = subHeader.glyphIndexArray[offsetIndex]
348 if gi != 0:
jvrd299b552006-10-21 13:54:30 +0000349 gi = (gi + subHeader.idDelta) % 0x10000
350 else:
351 continue # gi is notdef.
352 cmap[charCode] = gi
jvrbafa66e2003-08-28 18:04:23 +0000353 else:
354 if subHeader.entryCount:
jvrd299b552006-10-21 13:54:30 +0000355 charCodeOffset = firstByte * 256 + subHeader.firstCode
jvrbafa66e2003-08-28 18:04:23 +0000356 for offsetIndex in range(subHeader.entryCount):
jvrd299b552006-10-21 13:54:30 +0000357 charCode = charCodeOffset + offsetIndex
jvrbafa66e2003-08-28 18:04:23 +0000358 gi = subHeader.glyphIndexArray[offsetIndex]
359 if gi != 0:
jvrd299b552006-10-21 13:54:30 +0000360 gi = (gi + subHeader.idDelta) % 0x10000
361 else:
362 continue
363 cmap[charCode] = gi
364 # If not subHeader.entryCount, then all char codes with this first byte are
365 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
366 # same as mapping it to .notdef.
367 # cmap values are GID's.
368 glyphOrder = self.ttFont.getGlyphOrder()
Behdad Esfahbodc2297cd2013-11-27 06:26:55 -0500369 gids = list(cmap.values())
370 charCodes = list(cmap.keys())
jvrd299b552006-10-21 13:54:30 +0000371 lenCmap = len(gids)
372 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500373 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
jvrd299b552006-10-21 13:54:30 +0000374 except IndexError:
375 getGlyphName = self.ttFont.getGlyphName
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500376 names = list(map(getGlyphName, gids ))
377 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
jvrd299b552006-10-21 13:54:30 +0000378
jvrbafa66e2003-08-28 18:04:23 +0000379
Just7842e561999-12-16 21:34:53 +0000380 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000381 if self.data:
382 return struct.pack(">HHH", self.format, self.length, self.language) + self.data
jvrbafa66e2003-08-28 18:04:23 +0000383 kEmptyTwoCharCodeRange = -1
jvrd299b552006-10-21 13:54:30 +0000384 notdefGI = 0
385
Behdad Esfahbodac1b4352013-11-27 04:15:34 -0500386 items = sorted(self.cmap.items())
jvrd299b552006-10-21 13:54:30 +0000387 charCodes = [item[0] for item in items]
388 names = [item[1] for item in items]
389 nameMap = ttFont.getReverseGlyphMap()
390 lenCharCodes = len(charCodes)
391 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500392 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
jvrd299b552006-10-21 13:54:30 +0000393 except KeyError:
Behdad Esfahboddc873722013-12-04 21:28:50 -0500394 nameMap = ttFont.getReverseGlyphMap(rebuild=True)
jvrd299b552006-10-21 13:54:30 +0000395 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500396 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
jvrd299b552006-10-21 13:54:30 +0000397 except KeyError:
398 # allow virtual GIDs in format 2 tables
399 gids = []
400 for name in names:
401 try:
402 gid = nameMap[name]
403 except KeyError:
404 try:
405 if (name[:3] == 'gid'):
406 gid = eval(name[3:])
407 else:
408 gid = ttFont.getGlyphID(name)
409 except:
410 raise KeyError(name)
jvrbafa66e2003-08-28 18:04:23 +0000411
jvrd299b552006-10-21 13:54:30 +0000412 gids.append(gid)
413
414 # Process the (char code to gid) item list in char code order.
415 # By definition, all one byte char codes map to subheader 0.
416 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
417 # which defines all char codes in its range to map to notdef) unless proven otherwise.
418 # Note that since the char code items are processed in char code order, all the char codes with the
419 # same first byte are in sequential order.
420
421 subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList.
jvrbafa66e2003-08-28 18:04:23 +0000422 subHeaderList = []
jvrd299b552006-10-21 13:54:30 +0000423
424 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
425 # with a cmap where all the one byte char codes map to notdef,
426 # with the result that the subhead 0 would not get created just by processing the item list.
427 charCode = charCodes[0]
428 if charCode > 255:
429 subHeader = SubHeader()
430 subHeader.firstCode = 0
431 subHeader.entryCount = 0
432 subHeader.idDelta = 0
433 subHeader.idRangeOffset = 0
434 subHeaderList.append(subHeader)
435
jvrbafa66e2003-08-28 18:04:23 +0000436
437 lastFirstByte = -1
jvrd299b552006-10-21 13:54:30 +0000438 items = zip(charCodes, gids)
439 for charCode, gid in items:
440 if gid == 0:
441 continue
jvrbafa66e2003-08-28 18:04:23 +0000442 firstbyte = charCode >> 8
443 secondByte = charCode & 0x00FF
jvrd299b552006-10-21 13:54:30 +0000444
445 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
jvrbafa66e2003-08-28 18:04:23 +0000446 if lastFirstByte > -1:
jvrd299b552006-10-21 13:54:30 +0000447 # fix GI's and iDelta of current subheader.
448 self.setIDDelta(subHeader)
449
450 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
451 # for the indices matching the char codes.
452 if lastFirstByte == 0:
453 for index in range(subHeader.entryCount):
454 charCode = subHeader.firstCode + index
455 subHeaderKeys[charCode] = 0
456
jvrbafa66e2003-08-28 18:04:23 +0000457 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
458 # init new subheader
459 subHeader = SubHeader()
460 subHeader.firstCode = secondByte
jvrd299b552006-10-21 13:54:30 +0000461 subHeader.entryCount = 1
462 subHeader.glyphIndexArray.append(gid)
463 subHeaderList.append(subHeader)
464 subHeaderKeys[firstbyte] = len(subHeaderList) -1
jvrbafa66e2003-08-28 18:04:23 +0000465 lastFirstByte = firstbyte
466 else:
jvrd299b552006-10-21 13:54:30 +0000467 # need to fill in with notdefs all the code points between the last charCode and the current charCode.
jvrbafa66e2003-08-28 18:04:23 +0000468 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
469 for i in range(codeDiff):
jvrd299b552006-10-21 13:54:30 +0000470 subHeader.glyphIndexArray.append(notdefGI)
471 subHeader.glyphIndexArray.append(gid)
jvrbafa66e2003-08-28 18:04:23 +0000472 subHeader.entryCount = subHeader.entryCount + codeDiff + 1
jvrd299b552006-10-21 13:54:30 +0000473
474 # fix GI's and iDelta of last subheader that we we added to the subheader array.
475 self.setIDDelta(subHeader)
jvrbafa66e2003-08-28 18:04:23 +0000476
jvrd299b552006-10-21 13:54:30 +0000477 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
jvrbafa66e2003-08-28 18:04:23 +0000478 subHeader = SubHeader()
479 subHeader.firstCode = 0
480 subHeader.entryCount = 0
481 subHeader.idDelta = 0
482 subHeader.idRangeOffset = 2
483 subHeaderList.append(subHeader)
484 emptySubheadIndex = len(subHeaderList) - 1
485 for index in range(256):
jvrd299b552006-10-21 13:54:30 +0000486 if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
jvrbafa66e2003-08-28 18:04:23 +0000487 subHeaderKeys[index] = emptySubheadIndex
488 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
jvrd299b552006-10-21 13:54:30 +0000489 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
jvrbafa66e2003-08-28 18:04:23 +0000490 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
491 # charcode 0 and GID 0.
492
jvrbafa66e2003-08-28 18:04:23 +0000493 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
jvrd299b552006-10-21 13:54:30 +0000494 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
495 for index in range(subheadRangeLen):
496 subHeader = subHeaderList[index]
497 subHeader.idRangeOffset = 0
498 for j in range(index):
499 prevSubhead = subHeaderList[j]
500 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
501 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
502 subHeader.glyphIndexArray = []
503 break
504 if subHeader.idRangeOffset == 0: # didn't find one.
505 subHeader.idRangeOffset = idRangeOffset
506 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
507 else:
508 idRangeOffset = idRangeOffset - 8 # one less subheader
509
jvrbafa66e2003-08-28 18:04:23 +0000510 # Now we can write out the data!
511 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
512 for subhead in subHeaderList[:-1]:
jvrd299b552006-10-21 13:54:30 +0000513 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays.
514 dataList = [struct.pack(">HHH", 2, length, self.language)]
jvrbafa66e2003-08-28 18:04:23 +0000515 for index in subHeaderKeys:
jvrd299b552006-10-21 13:54:30 +0000516 dataList.append(struct.pack(">H", index*8))
jvrbafa66e2003-08-28 18:04:23 +0000517 for subhead in subHeaderList:
jvrd299b552006-10-21 13:54:30 +0000518 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
jvrbafa66e2003-08-28 18:04:23 +0000519 for subhead in subHeaderList[:-1]:
520 for gi in subhead.glyphIndexArray:
jvrd299b552006-10-21 13:54:30 +0000521 dataList.append(struct.pack(">H", gi))
Behdad Esfahbod18316aa2013-11-27 21:17:35 -0500522 data = bytesjoin(dataList)
jvrbafa66e2003-08-28 18:04:23 +0000523 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
524 return data
jvrd299b552006-10-21 13:54:30 +0000525
526
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -0500527 def fromXML(self, name, attrs, content, ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000528 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000529 if not hasattr(self, "cmap"):
530 self.cmap = {}
531 cmap = self.cmap
532
jvrbafa66e2003-08-28 18:04:23 +0000533 for element in content:
Behdad Esfahbodb774f9f2013-11-27 05:17:37 -0500534 if not isinstance(element, tuple):
jvrbafa66e2003-08-28 18:04:23 +0000535 continue
536 name, attrs, content = element
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500537 if name != "map":
jvrbafa66e2003-08-28 18:04:23 +0000538 continue
jvrd299b552006-10-21 13:54:30 +0000539 cmap[safeEval(attrs["code"])] = attrs["name"]
Just7842e561999-12-16 21:34:53 +0000540
541
542cmap_format_4_format = ">7H"
543
jvr1f8a4bb2002-07-23 07:51:23 +0000544#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF.
545#uint16 reservedPad # This value should be zero
546#uint16 startCode[segCount] # Starting character code for each segment
547#uint16 idDelta[segCount] # Delta for all character codes in segment
548#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0
549#uint16 glyphIndexArray[variable] # Glyph index array
Just7842e561999-12-16 21:34:53 +0000550
jvr542b9512002-07-20 21:57:26 +0000551def splitRange(startCode, endCode, cmap):
jvr1f8a4bb2002-07-23 07:51:23 +0000552 # Try to split a range of character codes into subranges with consecutive
553 # glyph IDs in such a way that the cmap4 subtable can be stored "most"
554 # efficiently. I can't prove I've got the optimal solution, but it seems
555 # to do well with the fonts I tested: none became bigger, many became smaller.
jvr542b9512002-07-20 21:57:26 +0000556 if startCode == endCode:
557 return [], [endCode]
558
jvr542b9512002-07-20 21:57:26 +0000559 lastID = cmap[startCode]
560 lastCode = startCode
561 inOrder = None
562 orderedBegin = None
jvr1f8a4bb2002-07-23 07:51:23 +0000563 subRanges = []
jvr542b9512002-07-20 21:57:26 +0000564
jvr1f8a4bb2002-07-23 07:51:23 +0000565 # Gather subranges in which the glyph IDs are consecutive.
jvr542b9512002-07-20 21:57:26 +0000566 for code in range(startCode + 1, endCode + 1):
567 glyphID = cmap[code]
jvr542b9512002-07-20 21:57:26 +0000568
569 if glyphID - 1 == lastID:
570 if inOrder is None or not inOrder:
571 inOrder = 1
572 orderedBegin = lastCode
573 else:
574 if inOrder:
575 inOrder = 0
jvr1f8a4bb2002-07-23 07:51:23 +0000576 subRanges.append((orderedBegin, lastCode))
jvr542b9512002-07-20 21:57:26 +0000577 orderedBegin = None
578
579 lastID = glyphID
580 lastCode = code
581
582 if inOrder:
jvr1f8a4bb2002-07-23 07:51:23 +0000583 subRanges.append((orderedBegin, lastCode))
jvr542b9512002-07-20 21:57:26 +0000584 assert lastCode == endCode
585
jvr1f8a4bb2002-07-23 07:51:23 +0000586 # Now filter out those new subranges that would only make the data bigger.
587 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per
588 # character.
589 newRanges = []
590 for b, e in subRanges:
jvr542b9512002-07-20 21:57:26 +0000591 if b == startCode and e == endCode:
592 break # the whole range, we're fine
593 if b == startCode or e == endCode:
594 threshold = 4 # split costs one more segment
595 else:
596 threshold = 8 # split costs two more segments
597 if (e - b + 1) > threshold:
jvr1f8a4bb2002-07-23 07:51:23 +0000598 newRanges.append((b, e))
599 subRanges = newRanges
jvr542b9512002-07-20 21:57:26 +0000600
jvr1f8a4bb2002-07-23 07:51:23 +0000601 if not subRanges:
jvr542b9512002-07-20 21:57:26 +0000602 return [], [endCode]
603
jvr1f8a4bb2002-07-23 07:51:23 +0000604 if subRanges[0][0] != startCode:
605 subRanges.insert(0, (startCode, subRanges[0][0] - 1))
606 if subRanges[-1][1] != endCode:
607 subRanges.append((subRanges[-1][1] + 1, endCode))
608
609 # Fill the "holes" in the segments list -- those are the segments in which
610 # the glyph IDs are _not_ consecutive.
jvr542b9512002-07-20 21:57:26 +0000611 i = 1
jvr1f8a4bb2002-07-23 07:51:23 +0000612 while i < len(subRanges):
613 if subRanges[i-1][1] + 1 != subRanges[i][0]:
614 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
jvr542b9512002-07-20 21:57:26 +0000615 i = i + 1
616 i = i + 1
617
jvr1f8a4bb2002-07-23 07:51:23 +0000618 # Transform the ranges into startCode/endCode lists.
jvr542b9512002-07-20 21:57:26 +0000619 start = []
620 end = []
jvr1f8a4bb2002-07-23 07:51:23 +0000621 for b, e in subRanges:
jvr542b9512002-07-20 21:57:26 +0000622 start.append(b)
623 end.append(e)
624 start.pop(0)
625
626 assert len(start) + 1 == len(end)
627 return start, end
628
629
Just7842e561999-12-16 21:34:53 +0000630class cmap_format_4(CmapSubtable):
631
632 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000633 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
634 # If not, someone is calling the subtable decompile() directly, and must provide both args.
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500635 if data is not None and ttFont is not None:
jvrd299b552006-10-21 13:54:30 +0000636 self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
637 else:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500638 assert (data is None and ttFont is None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000639
640 data = self.data # decompileHeader assigns the data after the header to self.data
641 (segCountX2, searchRange, entrySelector, rangeShift) = \
642 struct.unpack(">4H", data[:8])
643 data = data[8:]
Behdad Esfahbod32c10ee2013-11-27 17:46:17 -0500644 segCount = segCountX2 // 2
Just7842e561999-12-16 21:34:53 +0000645
jvr542b9512002-07-20 21:57:26 +0000646 allCodes = array.array("H")
jvrd299b552006-10-21 13:54:30 +0000647 allCodes.fromstring(data)
648 self.data = data = None
649
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500650 if sys.byteorder != "big":
jvr542b9512002-07-20 21:57:26 +0000651 allCodes.byteswap()
Just7842e561999-12-16 21:34:53 +0000652
653 # divide the data
jvr542b9512002-07-20 21:57:26 +0000654 endCode = allCodes[:segCount]
655 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field
656 startCode = allCodes[:segCount]
657 allCodes = allCodes[segCount:]
658 idDelta = allCodes[:segCount]
659 allCodes = allCodes[segCount:]
660 idRangeOffset = allCodes[:segCount]
661 glyphIndexArray = allCodes[segCount:]
jvrd299b552006-10-21 13:54:30 +0000662 lenGIArray = len(glyphIndexArray)
663
Just7842e561999-12-16 21:34:53 +0000664 # build 2-byte character mapping
jvrd299b552006-10-21 13:54:30 +0000665 charCodes = []
666 gids = []
Just7842e561999-12-16 21:34:53 +0000667 for i in range(len(startCode) - 1): # don't do 0xffff!
Behdad Esfahbod2db5eca2014-05-14 00:08:15 -0400668 start = startCode[i]
669 delta = idDelta[i]
670 rangeOffset = idRangeOffset[i]
671 # *someone* needs to get killed.
672 partial = rangeOffset // 2 - start + i - len(idRangeOffset)
673
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -0500674 rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
Behdad Esfahbod0d182bf2014-05-14 00:00:47 -0400675 charCodes.extend(rangeCharCodes)
Behdad Esfahbod470d6102014-05-14 00:12:03 -0400676 if rangeOffset == 0:
677 gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
678 else:
679 for charCode in rangeCharCodes:
Behdad Esfahbod2db5eca2014-05-14 00:08:15 -0400680 index = charCode + partial
jvrd299b552006-10-21 13:54:30 +0000681 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray)
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500682 if glyphIndexArray[index] != 0: # if not missing glyph
Behdad Esfahbod2db5eca2014-05-14 00:08:15 -0400683 glyphID = glyphIndexArray[index] + delta
Just7842e561999-12-16 21:34:53 +0000684 else:
685 glyphID = 0 # missing glyph
Behdad Esfahbod470d6102014-05-14 00:12:03 -0400686 gids.append(glyphID & 0xFFFF)
jvrd299b552006-10-21 13:54:30 +0000687
688 self.cmap = cmap = {}
689 lenCmap = len(gids)
690 glyphOrder = self.ttFont.getGlyphOrder()
691 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500692 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
jvrd299b552006-10-21 13:54:30 +0000693 except IndexError:
694 getGlyphName = self.ttFont.getGlyphName
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500695 names = list(map(getGlyphName, gids ))
696 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
jvrd299b552006-10-21 13:54:30 +0000697
698
699
700 def setIDDelta(self, idDelta):
701 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
702 # idDelta is a short, and must be between -32K and 32K
703 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
704 # This means that we have a problem because we can need to assign to idDelta values
705 # between -(64K-2) and 64K -1.
706 # Since the final gi is reconstructed from the glyphArray GID by:
707 # (short)finalGID = (gid + idDelta) % 0x10000),
708 # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
709 # negative number to an unsigned short.
710 # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
711 # the modulo arithmetic.
712
713 if idDelta > 0x7FFF:
714 idDelta = idDelta - 0x10000
715 elif idDelta < -0x7FFF:
716 idDelta = idDelta + 0x10000
717
718 return idDelta
719
720
Just7842e561999-12-16 21:34:53 +0000721 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000722 if self.data:
723 return struct.pack(">HHH", self.format, self.length, self.language) + self.data
724
jvrea9dfa92002-05-12 17:14:50 +0000725 from fontTools.ttLib.sfnt import maxPowerOfTwo
Just7842e561999-12-16 21:34:53 +0000726
Behdad Esfahbodc2297cd2013-11-27 06:26:55 -0500727 charCodes = list(self.cmap.keys())
jvrd299b552006-10-21 13:54:30 +0000728 lenCharCodes = len(charCodes)
729 if lenCharCodes == 0:
730 startCode = [0xffff]
731 endCode = [0xffff]
732 else:
jvr2db352c2008-02-29 14:43:49 +0000733 charCodes.sort()
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500734 names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes))
jvrd299b552006-10-21 13:54:30 +0000735 nameMap = ttFont.getReverseGlyphMap()
736 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500737 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
jvrd299b552006-10-21 13:54:30 +0000738 except KeyError:
Behdad Esfahboddc873722013-12-04 21:28:50 -0500739 nameMap = ttFont.getReverseGlyphMap(rebuild=True)
jvrd299b552006-10-21 13:54:30 +0000740 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500741 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
jvrd299b552006-10-21 13:54:30 +0000742 except KeyError:
743 # allow virtual GIDs in format 4 tables
744 gids = []
745 for name in names:
746 try:
747 gid = nameMap[name]
748 except KeyError:
749 try:
750 if (name[:3] == 'gid'):
751 gid = eval(name[3:])
752 else:
753 gid = ttFont.getGlyphID(name)
754 except:
755 raise KeyError(name)
756
757 gids.append(gid)
758 cmap = {} # code:glyphID mapping
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500759 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
Just7842e561999-12-16 21:34:53 +0000760
jvrd299b552006-10-21 13:54:30 +0000761 # Build startCode and endCode lists.
762 # Split the char codes in ranges of consecutive char codes, then split
763 # each range in more ranges of consecutive/not consecutive glyph IDs.
764 # See splitRange().
765 lastCode = charCodes[0]
766 endCode = []
767 startCode = [lastCode]
768 for charCode in charCodes[1:]: # skip the first code, it's the first start code
769 if charCode == lastCode + 1:
770 lastCode = charCode
771 continue
772 start, end = splitRange(startCode[-1], lastCode, cmap)
773 startCode.extend(start)
774 endCode.extend(end)
775 startCode.append(charCode)
jvr542b9512002-07-20 21:57:26 +0000776 lastCode = charCode
jvrd299b552006-10-21 13:54:30 +0000777 endCode.append(lastCode)
778 startCode.append(0xffff)
779 endCode.append(0xffff)
Just7842e561999-12-16 21:34:53 +0000780
jvr542b9512002-07-20 21:57:26 +0000781 # build up rest of cruft
Just7842e561999-12-16 21:34:53 +0000782 idDelta = []
783 idRangeOffset = []
784 glyphIndexArray = []
Just7842e561999-12-16 21:34:53 +0000785 for i in range(len(endCode)-1): # skip the closing codes (0xffff)
786 indices = []
jvr542b9512002-07-20 21:57:26 +0000787 for charCode in range(startCode[i], endCode[i] + 1):
788 indices.append(cmap[charCode])
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -0500789 if (indices == list(range(indices[0], indices[0] + len(indices)))):
jvrd299b552006-10-21 13:54:30 +0000790 idDeltaTemp = self.setIDDelta(indices[0] - startCode[i])
791 idDelta.append( idDeltaTemp)
Just7842e561999-12-16 21:34:53 +0000792 idRangeOffset.append(0)
793 else:
794 # someone *definitely* needs to get killed.
795 idDelta.append(0)
796 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
jvr542b9512002-07-20 21:57:26 +0000797 glyphIndexArray.extend(indices)
Just7842e561999-12-16 21:34:53 +0000798 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
799 idRangeOffset.append(0)
800
801 # Insane.
802 segCount = len(endCode)
803 segCountX2 = segCount * 2
jvr542b9512002-07-20 21:57:26 +0000804 maxExponent = maxPowerOfTwo(segCount)
805 searchRange = 2 * (2 ** maxExponent)
806 entrySelector = maxExponent
Just7842e561999-12-16 21:34:53 +0000807 rangeShift = 2 * segCount - searchRange
808
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400809 charCodeArray = array.array("H", endCode + [0] + startCode)
Behdad Esfahbod4c777d32014-05-21 17:57:14 -0400810 idDeltaArray = array.array("h", idDelta)
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400811 restArray = array.array("H", idRangeOffset + glyphIndexArray)
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500812 if sys.byteorder != "big":
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400813 charCodeArray.byteswap()
Behdad Esfahbod4c777d32014-05-21 17:57:14 -0400814 idDeltaArray.byteswap()
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400815 restArray.byteswap()
Behdad Esfahbod4c777d32014-05-21 17:57:14 -0400816 data = charCodeArray.tostring() + idDeltaArray.tostring() + restArray.tostring()
jvrd299b552006-10-21 13:54:30 +0000817
Just7842e561999-12-16 21:34:53 +0000818 length = struct.calcsize(cmap_format_4_format) + len(data)
jvr0cd79a52004-09-25 07:30:47 +0000819 header = struct.pack(cmap_format_4_format, self.format, length, self.language,
Just7842e561999-12-16 21:34:53 +0000820 segCountX2, searchRange, entrySelector, rangeShift)
jvrd299b552006-10-21 13:54:30 +0000821 return header + data
Just7842e561999-12-16 21:34:53 +0000822
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -0500823 def fromXML(self, name, attrs, content, ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000824 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000825 if not hasattr(self, "cmap"):
826 self.cmap = {}
827 cmap = self.cmap
828
Just7842e561999-12-16 21:34:53 +0000829 for element in content:
Behdad Esfahbodb774f9f2013-11-27 05:17:37 -0500830 if not isinstance(element, tuple):
Just7842e561999-12-16 21:34:53 +0000831 continue
jvrd299b552006-10-21 13:54:30 +0000832 nameMap, attrsMap, dummyContent = element
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500833 if nameMap != "map":
jvrd299b552006-10-21 13:54:30 +0000834 assert 0, "Unrecognized keyword in cmap subtable"
835 cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
Just7842e561999-12-16 21:34:53 +0000836
837
838class cmap_format_6(CmapSubtable):
839
840 def decompile(self, data, ttFont):
jvrd299b552006-10-21 13:54:30 +0000841 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
842 # If not, someone is calling the subtable decompile() directly, and must provide both args.
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500843 if data is not None and ttFont is not None:
jvrd299b552006-10-21 13:54:30 +0000844 self.decompileHeader(data[offset:offset+int(length)], ttFont)
845 else:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500846 assert (data is None and ttFont is None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000847
848 data = self.data # decompileHeader assigns the data after the header to self.data
849 firstCode, entryCount = struct.unpack(">HH", data[:4])
Just7842e561999-12-16 21:34:53 +0000850 firstCode = int(firstCode)
jvrd299b552006-10-21 13:54:30 +0000851 data = data[4:]
Justf6b15632000-08-23 12:33:14 +0000852 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!!
Just7842e561999-12-16 21:34:53 +0000853 glyphIndexArray = array.array("H")
Just43fa4be2000-10-11 18:04:03 +0000854 glyphIndexArray.fromstring(data[:2 * int(entryCount)])
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500855 if sys.byteorder != "big":
Just7842e561999-12-16 21:34:53 +0000856 glyphIndexArray.byteswap()
jvrd299b552006-10-21 13:54:30 +0000857 self.data = data = None
858
Just7842e561999-12-16 21:34:53 +0000859 self.cmap = cmap = {}
jvrd299b552006-10-21 13:54:30 +0000860
861 lenArray = len(glyphIndexArray)
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -0500862 charCodes = list(range(firstCode, firstCode + lenArray))
jvrd299b552006-10-21 13:54:30 +0000863 glyphOrder = self.ttFont.getGlyphOrder()
864 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500865 names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ))
jvrd299b552006-10-21 13:54:30 +0000866 except IndexError:
867 getGlyphName = self.ttFont.getGlyphName
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500868 names = list(map(getGlyphName, glyphIndexArray ))
869 list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
Just7842e561999-12-16 21:34:53 +0000870
871 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000872 if self.data:
873 return struct.pack(">HHH", self.format, self.length, self.language) + self.data
874 cmap = self.cmap
Behdad Esfahbodc2297cd2013-11-27 06:26:55 -0500875 codes = list(cmap.keys())
jvrd299b552006-10-21 13:54:30 +0000876 if codes: # yes, there are empty cmap tables.
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -0500877 codes = list(range(codes[0], codes[-1] + 1))
jvrd299b552006-10-21 13:54:30 +0000878 firstCode = codes[0]
Behdad Esfahbod13a08d02013-11-26 15:49:36 -0500879 valueList = [cmap.get(code, ".notdef") for code in codes]
jvrd299b552006-10-21 13:54:30 +0000880 valueList = map(ttFont.getGlyphID, valueList)
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400881 glyphIndexArray = array.array("H", valueList)
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500882 if sys.byteorder != "big":
Behdad Esfahbod8da82422013-08-16 12:56:08 -0400883 glyphIndexArray.byteswap()
jvrd299b552006-10-21 13:54:30 +0000884 data = glyphIndexArray.tostring()
885 else:
Behdad Esfahbod5f6418d2013-11-27 22:00:49 -0500886 data = b""
jvrd299b552006-10-21 13:54:30 +0000887 firstCode = 0
Just7842e561999-12-16 21:34:53 +0000888 header = struct.pack(">HHHHH",
jvrd299b552006-10-21 13:54:30 +0000889 6, len(data) + 10, self.language, firstCode, len(codes))
Just7842e561999-12-16 21:34:53 +0000890 return header + data
891
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -0500892 def fromXML(self, name, attrs, content, ttFont):
jvr0cd79a52004-09-25 07:30:47 +0000893 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +0000894 if not hasattr(self, "cmap"):
895 self.cmap = {}
896 cmap = self.cmap
897
Just7842e561999-12-16 21:34:53 +0000898 for element in content:
Behdad Esfahbodb774f9f2013-11-27 05:17:37 -0500899 if not isinstance(element, tuple):
Just7842e561999-12-16 21:34:53 +0000900 continue
901 name, attrs, content = element
Behdad Esfahbod180ace62013-11-27 02:40:30 -0500902 if name != "map":
Just7842e561999-12-16 21:34:53 +0000903 continue
jvrd299b552006-10-21 13:54:30 +0000904 cmap[safeEval(attrs["code"])] = attrs["name"]
Just7842e561999-12-16 21:34:53 +0000905
906
Roozbeh Pournader51a17822013-10-09 15:55:07 -0700907class cmap_format_12_or_13(CmapSubtable):
jvr924e4e22003-02-08 10:45:23 +0000908
jvrd299b552006-10-21 13:54:30 +0000909 def __init__(self, format):
910 self.format = format
911 self.reserved = 0
912 self.data = None
913 self.ttFont = None
914
915 def decompileHeader(self, data, ttFont):
jvr924e4e22003-02-08 10:45:23 +0000916 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
Roozbeh Pournader51a17822013-10-09 15:55:07 -0700917 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
jvr924e4e22003-02-08 10:45:23 +0000918 self.format = format
919 self.reserved = reserved
920 self.length = length
921 self.language = language
922 self.nGroups = nGroups
jvrd299b552006-10-21 13:54:30 +0000923 self.data = data[16:]
924 self.ttFont = ttFont
925
926 def decompile(self, data, ttFont):
927 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
928 # If not, someone is calling the subtable decompile() directly, and must provide both args.
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500929 if data is not None and ttFont is not None:
jvrd299b552006-10-21 13:54:30 +0000930 self.decompileHeader(data[offset:offset+int(length)], ttFont)
931 else:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -0500932 assert (data is None and ttFont is None), "Need both data and ttFont arguments"
jvrd299b552006-10-21 13:54:30 +0000933
934 data = self.data # decompileHeader assigns the data after the header to self.data
935 charCodes = []
936 gids = []
937 pos = 0
938 for i in range(self.nGroups):
939 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
940 pos += 12
941 lenGroup = 1 + endCharCode - startCharCode
Behdad Esfahbodd56eebf2014-05-14 00:22:14 -0400942 charCodes.extend(list(range(startCharCode, endCharCode +1)))
943 gids.extend(self._computeGIDs(glyphID, lenGroup))
jvrd299b552006-10-21 13:54:30 +0000944 self.data = data = None
945 self.cmap = cmap = {}
946 lenCmap = len(gids)
947 glyphOrder = self.ttFont.getGlyphOrder()
948 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500949 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
jvrd299b552006-10-21 13:54:30 +0000950 except IndexError:
951 getGlyphName = self.ttFont.getGlyphName
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500952 names = list(map(getGlyphName, gids ))
953 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
jvr924e4e22003-02-08 10:45:23 +0000954
955 def compile(self, ttFont):
jvrd299b552006-10-21 13:54:30 +0000956 if self.data:
Roozbeh Pournader51a17822013-10-09 15:55:07 -0700957 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
Behdad Esfahbodc2297cd2013-11-27 06:26:55 -0500958 charCodes = list(self.cmap.keys())
jvrd299b552006-10-21 13:54:30 +0000959 lenCharCodes = len(charCodes)
Behdad Esfahbodc2297cd2013-11-27 06:26:55 -0500960 names = list(self.cmap.values())
jvrd299b552006-10-21 13:54:30 +0000961 nameMap = ttFont.getReverseGlyphMap()
962 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500963 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
jvrd299b552006-10-21 13:54:30 +0000964 except KeyError:
Behdad Esfahboddc873722013-12-04 21:28:50 -0500965 nameMap = ttFont.getReverseGlyphMap(rebuild=True)
jvrd299b552006-10-21 13:54:30 +0000966 try:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500967 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
jvrd299b552006-10-21 13:54:30 +0000968 except KeyError:
969 # allow virtual GIDs in format 12 tables
970 gids = []
971 for name in names:
972 try:
973 gid = nameMap[name]
974 except KeyError:
975 try:
976 if (name[:3] == 'gid'):
977 gid = eval(name[3:])
978 else:
979 gid = ttFont.getGlyphID(name)
980 except:
981 raise KeyError(name)
982
983 gids.append(gid)
984
985 cmap = {} # code:glyphID mapping
Behdad Esfahbode5ca7962013-11-27 04:38:16 -0500986 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
jvrd299b552006-10-21 13:54:30 +0000987
jvr924e4e22003-02-08 10:45:23 +0000988 charCodes.sort()
jvrd299b552006-10-21 13:54:30 +0000989 index = 0
jvr924e4e22003-02-08 10:45:23 +0000990 startCharCode = charCodes[0]
991 startGlyphID = cmap[startCharCode]
Roozbeh Pournader51a17822013-10-09 15:55:07 -0700992 lastGlyphID = startGlyphID - self._format_step
jvrd299b552006-10-21 13:54:30 +0000993 lastCharCode = startCharCode - 1
jvr0cd79a52004-09-25 07:30:47 +0000994 nGroups = 0
jvrd299b552006-10-21 13:54:30 +0000995 dataList = []
996 maxIndex = len(charCodes)
997 for index in range(maxIndex):
998 charCode = charCodes[index]
jvr924e4e22003-02-08 10:45:23 +0000999 glyphID = cmap[charCode]
Roozbeh Pournader51a17822013-10-09 15:55:07 -07001000 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
jvrd299b552006-10-21 13:54:30 +00001001 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
jvr924e4e22003-02-08 10:45:23 +00001002 startCharCode = charCode
jvrd299b552006-10-21 13:54:30 +00001003 startGlyphID = glyphID
jvr924e4e22003-02-08 10:45:23 +00001004 nGroups = nGroups + 1
jvrd299b552006-10-21 13:54:30 +00001005 lastGlyphID = glyphID
1006 lastCharCode = charCode
1007 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
jvr0cd79a52004-09-25 07:30:47 +00001008 nGroups = nGroups + 1
Behdad Esfahbod18316aa2013-11-27 21:17:35 -05001009 data = bytesjoin(dataList)
jvrd299b552006-10-21 13:54:30 +00001010 lengthSubtable = len(data) +16
1011 assert len(data) == (nGroups*12) == (lengthSubtable-16)
1012 return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
jvr924e4e22003-02-08 10:45:23 +00001013
1014 def toXML(self, writer, ttFont):
1015 writer.begintag(self.__class__.__name__, [
1016 ("platformID", self.platformID),
1017 ("platEncID", self.platEncID),
1018 ("format", self.format),
1019 ("reserved", self.reserved),
1020 ("length", self.length),
1021 ("language", self.language),
1022 ("nGroups", self.nGroups),
1023 ])
1024 writer.newline()
Behdad Esfahbodac1b4352013-11-27 04:15:34 -05001025 codes = sorted(self.cmap.items())
jvra84b28d2004-09-25 09:06:58 +00001026 self._writeCodes(codes, writer)
jvr924e4e22003-02-08 10:45:23 +00001027 writer.endtag(self.__class__.__name__)
1028 writer.newline()
1029
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -05001030 def fromXML(self, name, attrs, content, ttFont):
jvrd299b552006-10-21 13:54:30 +00001031 self.format = safeEval(attrs["format"])
1032 self.reserved = safeEval(attrs["reserved"])
1033 self.length = safeEval(attrs["length"])
jvr924e4e22003-02-08 10:45:23 +00001034 self.language = safeEval(attrs["language"])
jvrd299b552006-10-21 13:54:30 +00001035 self.nGroups = safeEval(attrs["nGroups"])
1036 if not hasattr(self, "cmap"):
1037 self.cmap = {}
1038 cmap = self.cmap
1039
jvr924e4e22003-02-08 10:45:23 +00001040 for element in content:
Behdad Esfahbodb774f9f2013-11-27 05:17:37 -05001041 if not isinstance(element, tuple):
jvr924e4e22003-02-08 10:45:23 +00001042 continue
1043 name, attrs, content = element
Behdad Esfahbod180ace62013-11-27 02:40:30 -05001044 if name != "map":
jvr924e4e22003-02-08 10:45:23 +00001045 continue
jvrd299b552006-10-21 13:54:30 +00001046 cmap[safeEval(attrs["code"])] = attrs["name"]
jvr924e4e22003-02-08 10:45:23 +00001047
1048
Roozbeh Pournader51a17822013-10-09 15:55:07 -07001049class cmap_format_12(cmap_format_12_or_13):
1050 def __init__(self, format):
1051 cmap_format_12_or_13.__init__(self, format)
1052 self._format_step = 1
1053
1054 def _computeGIDs(self, startingGlyph, numberOfGlyphs):
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -05001055 return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
Roozbeh Pournader51a17822013-10-09 15:55:07 -07001056
1057 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1058 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1059
1060
1061class cmap_format_13(cmap_format_12_or_13):
1062 def __init__(self, format):
1063 cmap_format_12_or_13.__init__(self, format)
1064 self._format_step = 0
1065
1066 def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1067 return [startingGlyph] * numberOfGlyphs
1068
1069 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1070 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1071
1072
jvr0cb8a082008-05-16 15:07:09 +00001073def cvtToUVS(threeByteString):
Behdad Esfahbod2242b262013-11-28 06:35:12 -05001074 data = b"\0" + threeByteString
jvr0cb8a082008-05-16 15:07:09 +00001075 val, = struct.unpack(">L", data)
1076 return val
1077
1078def cvtFromUVS(val):
Behdad Esfahbod2242b262013-11-28 06:35:12 -05001079 assert 0 <= val < 0x1000000
1080 fourByteString = struct.pack(">L", val)
1081 return fourByteString[1:]
jvr0cb8a082008-05-16 15:07:09 +00001082
Behdad Esfahbodb7fd2e12013-11-27 18:58:45 -05001083
jvr0cb8a082008-05-16 15:07:09 +00001084class cmap_format_14(CmapSubtable):
1085
1086 def decompileHeader(self, data, ttFont):
1087 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1088 self.data = data[10:]
1089 self.length = length
1090 self.numVarSelectorRecords = numVarSelectorRecords
1091 self.ttFont = ttFont
1092 self.language = 0xFF # has no language.
1093
1094 def decompile(self, data, ttFont):
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001095 if data is not None and ttFont is not None and ttFont.lazy:
jvr0cb8a082008-05-16 15:07:09 +00001096 self.decompileHeader(data, ttFont)
1097 else:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001098 assert (data is None and ttFont is None), "Need both data and ttFont arguments"
jvr0cb8a082008-05-16 15:07:09 +00001099 data = self.data
1100
1101 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1102 uvsDict = {}
1103 recOffset = 0
1104 for n in range(self.numVarSelectorRecords):
1105 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11])
1106 recOffset += 11
1107 varUVS = cvtToUVS(uvs)
1108 if defOVSOffset:
1109 startOffset = defOVSOffset - 10
1110 numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1111 startOffset +=4
1112 for r in range(numValues):
1113 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1114 startOffset += 4
1115 firstBaseUV = cvtToUVS(uv)
1116 cnt = addtlCnt+1
Behdad Esfahbod97dea0a2013-11-27 03:34:48 -05001117 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
jvr0cb8a082008-05-16 15:07:09 +00001118 glyphList = [None]*cnt
1119 localUVList = zip(baseUVList, glyphList)
1120 try:
1121 uvsDict[varUVS].extend(localUVList)
1122 except KeyError:
Behdad Esfahbodfa5f2e82013-11-27 04:13:15 -05001123 uvsDict[varUVS] = list(localUVList)
jvr0cb8a082008-05-16 15:07:09 +00001124
1125 if nonDefUVSOffset:
1126 startOffset = nonDefUVSOffset - 10
1127 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1128 startOffset +=4
1129 localUVList = []
1130 for r in range(numRecs):
1131 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1132 startOffset += 5
1133 uv = cvtToUVS(uv)
1134 glyphName = self.ttFont.getGlyphName(gid)
1135 localUVList.append( [uv, glyphName] )
1136 try:
1137 uvsDict[varUVS].extend(localUVList)
1138 except KeyError:
1139 uvsDict[varUVS] = localUVList
1140
1141 self.uvsDict = uvsDict
1142
1143 def toXML(self, writer, ttFont):
1144 writer.begintag(self.__class__.__name__, [
1145 ("platformID", self.platformID),
1146 ("platEncID", self.platEncID),
1147 ("format", self.format),
1148 ("length", self.length),
1149 ("numVarSelectorRecords", self.numVarSelectorRecords),
1150 ])
1151 writer.newline()
1152 uvsDict = self.uvsDict
Behdad Esfahbodac1b4352013-11-27 04:15:34 -05001153 uvsList = sorted(uvsDict.keys())
jvr0cb8a082008-05-16 15:07:09 +00001154 for uvs in uvsList:
1155 uvList = uvsDict[uvs]
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001156 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
jvr0cb8a082008-05-16 15:07:09 +00001157 for uv, gname in uvList:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001158 if gname is None:
jvr0cb8a082008-05-16 15:07:09 +00001159 gname = "None"
1160 # I use the arg rather than th keyword syntax in order to preserve the attribute order.
1161 writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] )
1162 writer.newline()
1163 writer.endtag(self.__class__.__name__)
1164 writer.newline()
1165
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -05001166 def fromXML(self, name, attrs, content, ttFont):
jvr0cb8a082008-05-16 15:07:09 +00001167 self.format = safeEval(attrs["format"])
1168 self.length = safeEval(attrs["length"])
1169 self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
Behdad Esfahbodb7fd2e12013-11-27 18:58:45 -05001170 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail
jvr0cb8a082008-05-16 15:07:09 +00001171 if not hasattr(self, "cmap"):
1172 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1173 if not hasattr(self, "uvsDict"):
1174 self.uvsDict = {}
1175 uvsDict = self.uvsDict
1176
1177 for element in content:
Behdad Esfahbodb774f9f2013-11-27 05:17:37 -05001178 if not isinstance(element, tuple):
jvr0cb8a082008-05-16 15:07:09 +00001179 continue
1180 name, attrs, content = element
Behdad Esfahbod180ace62013-11-27 02:40:30 -05001181 if name != "map":
jvr0cb8a082008-05-16 15:07:09 +00001182 continue
1183 uvs = safeEval(attrs["uvs"])
1184 uv = safeEval(attrs["uv"])
1185 gname = attrs["name"]
1186 if gname == "None":
1187 gname = None
1188 try:
1189 uvsDict[uvs].append( [uv, gname])
1190 except KeyError:
1191 uvsDict[uvs] = [ [uv, gname] ]
1192
1193
1194 def compile(self, ttFont):
1195 if self.data:
1196 return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
1197
1198 uvsDict = self.uvsDict
Behdad Esfahbodac1b4352013-11-27 04:15:34 -05001199 uvsList = sorted(uvsDict.keys())
jvr0cb8a082008-05-16 15:07:09 +00001200 self.numVarSelectorRecords = len(uvsList)
1201 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1202 data = []
1203 varSelectorRecords =[]
1204 for uvs in uvsList:
1205 entryList = uvsDict[uvs]
1206
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001207 defList = [entry for entry in entryList if entry[1] is None]
jvr0cb8a082008-05-16 15:07:09 +00001208 if defList:
Behdad Esfahbode5ca7962013-11-27 04:38:16 -05001209 defList = [entry[0] for entry in defList]
jvr0cb8a082008-05-16 15:07:09 +00001210 defOVSOffset = offset
1211 defList.sort()
1212
1213 lastUV = defList[0]
1214 cnt = -1
1215 defRecs = []
1216 for defEntry in defList:
1217 cnt +=1
1218 if (lastUV+cnt) != defEntry:
1219 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1220 lastUV = defEntry
1221 defRecs.append(rec)
1222 cnt = 0
1223
1224 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1225 defRecs.append(rec)
1226
1227 numDefRecs = len(defRecs)
1228 data.append(struct.pack(">L", numDefRecs))
1229 data.extend(defRecs)
1230 offset += 4 + numDefRecs*4
1231 else:
1232 defOVSOffset = 0
1233
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001234 ndefList = [entry for entry in entryList if entry[1] is not None]
jvr0cb8a082008-05-16 15:07:09 +00001235 if ndefList:
1236 nonDefUVSOffset = offset
1237 ndefList.sort()
1238 numNonDefRecs = len(ndefList)
1239 data.append(struct.pack(">L", numNonDefRecs))
1240 offset += 4 + numNonDefRecs*5
1241
1242 for uv, gname in ndefList:
1243 gid = ttFont.getGlyphID(gname)
1244 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1245 data.append(ndrec)
1246 else:
1247 nonDefUVSOffset = 0
1248
1249 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1250 varSelectorRecords.append(vrec)
1251
Behdad Esfahbod18316aa2013-11-27 21:17:35 -05001252 data = bytesjoin(varSelectorRecords) + bytesjoin(data)
jvr0cb8a082008-05-16 15:07:09 +00001253 self.length = 10 + len(data)
1254 headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
1255 self.data = headerdata + data
1256
1257 return self.data
1258
1259
Just7842e561999-12-16 21:34:53 +00001260class cmap_format_unknown(CmapSubtable):
1261
jvra84b28d2004-09-25 09:06:58 +00001262 def toXML(self, writer, ttFont):
jvrd299b552006-10-21 13:54:30 +00001263 cmapName = self.__class__.__name__[:12] + str(self.format)
1264 writer.begintag(cmapName, [
jvra84b28d2004-09-25 09:06:58 +00001265 ("platformID", self.platformID),
1266 ("platEncID", self.platEncID),
1267 ])
1268 writer.newline()
jvrd299b552006-10-21 13:54:30 +00001269 writer.dumphex(self.data)
1270 writer.endtag(cmapName)
jvra84b28d2004-09-25 09:06:58 +00001271 writer.newline()
1272
Behdad Esfahbod3a9fd302013-11-27 03:19:32 -05001273 def fromXML(self, name, attrs, content, ttFont):
jvrd299b552006-10-21 13:54:30 +00001274 self.data = readHex(content)
1275 self.cmap = {}
1276
1277 def decompileHeader(self, data, ttFont):
jvr427f9802004-09-26 18:32:50 +00001278 self.language = 0 # dummy value
Just7842e561999-12-16 21:34:53 +00001279 self.data = data
1280
jvrd299b552006-10-21 13:54:30 +00001281 def decompile(self, data, ttFont):
1282 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1283 # If not, someone is calling the subtable decompile() directly, and must provide both args.
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001284 if data is not None and ttFont is not None:
jvrd299b552006-10-21 13:54:30 +00001285 self.decompileHeader(data[offset:offset+int(length)], ttFont)
1286 else:
Behdad Esfahbod9e6ef942013-12-04 16:31:44 -05001287 assert (data is None and ttFont is None), "Need both data and ttFont arguments"
Just7842e561999-12-16 21:34:53 +00001288
jvrd299b552006-10-21 13:54:30 +00001289 def compile(self, ttFont):
1290 if self.data:
1291 return self.data
1292 else:
1293 return None
Just7842e561999-12-16 21:34:53 +00001294
1295cmap_classes = {
1296 0: cmap_format_0,
1297 2: cmap_format_2,
1298 4: cmap_format_4,
1299 6: cmap_format_6,
jvr924e4e22003-02-08 10:45:23 +00001300 12: cmap_format_12,
Roozbeh Pournader51a17822013-10-09 15:55:07 -07001301 13: cmap_format_13,
jvr0cb8a082008-05-16 15:07:09 +00001302 14: cmap_format_14,
Just7842e561999-12-16 21:34:53 +00001303 }