blob: 57b81c474c136ced558618a13dc9f6228d8bf33c [file] [log] [blame]
Just7842e561999-12-16 21:34:53 +00001import DefaultTable
2import struct
3import string
4import array
5from fontTools import ttLib
6from fontTools.misc.textTools import safeEval, readHex
7
8
9class table__c_m_a_p(DefaultTable.DefaultTable):
10
11 def getcmap(self, platformID, platEncID):
12 for subtable in self.tables:
13 if (subtable.platformID == platformID and
14 subtable.platEncID == platEncID):
15 return subtable
16 return None # not found
17
18 def decompile(self, data, ttFont):
19 tableVersion, numSubTables = struct.unpack(">HH", data[:4])
20 self.tableVersion = int(tableVersion)
21 self.tables = tables = []
22 for i in range(numSubTables):
23 platformID, platEncID, offset = struct.unpack(
24 ">HHl", data[4+i*8:4+(i+1)*8])
25 platformID, platEncID = int(platformID), int(platEncID)
26 format, length = struct.unpack(">HH", data[offset:offset+4])
27 if not cmap_classes.has_key(format):
28 table = cmap_format_unknown(format)
29 else:
30 table = cmap_classes[format](format)
31 table.platformID = platformID
32 table.platEncID = platEncID
33 table.decompile(data[offset:offset+int(length)], ttFont)
34 tables.append(table)
35
36 def compile(self, ttFont):
37 self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__()
38 numSubTables = len(self.tables)
39 totalOffset = 4 + 8 * numSubTables
40 data = struct.pack(">HH", self.tableVersion, numSubTables)
41 tableData = ""
42 done = {} # remember the data so we can reuse the "pointers"
43 for table in self.tables:
44 chunk = table.compile(ttFont)
45 if done.has_key(chunk):
46 offset = done[chunk]
47 else:
48 offset = done[chunk] = totalOffset + len(tableData)
Just1b850982000-06-07 18:25:44 +000049 tableData = tableData + chunk
Just7842e561999-12-16 21:34:53 +000050 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
51 return data + tableData
52
53 def toXML(self, writer, ttFont):
54 writer.simpletag("tableVersion", version=self.tableVersion)
55 writer.newline()
56 for table in self.tables:
57 table.toXML(writer, ttFont)
58
59 def fromXML(self, (name, attrs, content), ttFont):
60 if name == "tableVersion":
61 self.tableVersion = safeEval(attrs["version"])
62 return
63 if name[:12] <> "cmap_format_":
64 return
65 if not hasattr(self, "tables"):
66 self.tables = []
67 format = safeEval(name[12])
68 if not cmap_classes.has_key(format):
69 table = cmap_format_unknown(format)
70 else:
71 table = cmap_classes[format](format)
72 table.platformID = safeEval(attrs["platformID"])
73 table.platEncID = safeEval(attrs["platEncID"])
74 table.fromXML((name, attrs, content), ttFont)
75 self.tables.append(table)
76
77
78class CmapSubtable:
79
80 def __init__(self, format):
81 self.format = format
82
83 def toXML(self, writer, ttFont):
84 writer.begintag(self.__class__.__name__, [
85 ("platformID", self.platformID),
86 ("platEncID", self.platEncID),
87 ])
88 writer.newline()
89 writer.dumphex(self.compile(ttFont))
90 writer.endtag(self.__class__.__name__)
91 writer.newline()
92
93 def fromXML(self, (name, attrs, content), ttFont):
94 self.decompile(readHex(content), ttFont)
95
96 def __cmp__(self, other):
97 # implemented so that list.sort() sorts according to the cmap spec.
98 selfTuple = (
99 self.platformID,
100 self.platEncID,
101 self.version,
102 self.__dict__)
103 otherTuple = (
104 other.platformID,
105 other.platEncID,
106 other.version,
107 other.__dict__)
108 return cmp(selfTuple, otherTuple)
109
110
111class cmap_format_0(CmapSubtable):
112
113 def decompile(self, data, ttFont):
114 format, length, version = struct.unpack(">HHH", data[:6])
115 self.version = int(version)
116 assert len(data) == 262 == length
117 glyphIdArray = array.array("B")
118 glyphIdArray.fromstring(data[6:])
119 self.cmap = cmap = {}
120 for charCode in range(len(glyphIdArray)):
121 cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode])
122
123 def compile(self, ttFont):
124 charCodes = self.cmap.keys()
125 charCodes.sort()
126 assert charCodes == range(256) # charCodes[charCode] == charCode
127 for charCode in charCodes:
128 # reusing the charCodes list!
129 charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode])
130 glyphIdArray = array.array("B", charCodes)
131 data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring()
132 assert len(data) == 262
133 return data
134
135 def toXML(self, writer, ttFont):
136 writer.begintag(self.__class__.__name__, [
137 ("platformID", self.platformID),
138 ("platEncID", self.platEncID),
139 ("version", self.version),
140 ])
141 writer.newline()
142 items = self.cmap.items()
143 items.sort()
144 for code, name in items:
145 writer.simpletag("map", code=hex(code), name=name)
146 writer.newline()
147 writer.endtag(self.__class__.__name__)
148 writer.newline()
149
150 def fromXML(self, (name, attrs, content), ttFont):
151 self.version = safeEval(attrs["version"])
152 self.cmap = {}
153 for element in content:
154 if type(element) <> type(()):
155 continue
156 name, attrs, content = element
157 if name <> "map":
158 continue
159 self.cmap[safeEval(attrs["code"])] = attrs["name"]
160
161
162
163class cmap_format_2(CmapSubtable):
164
165 def decompile(self, data, ttFont):
166 format, length, version = struct.unpack(">HHH", data[:6])
167 self.version = int(version)
168 self.data = data
169
170 def compile(self, ttFont):
171 return self.data
172
173
174cmap_format_4_format = ">7H"
175
176#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF.
177#uint16 reservedPad # This value should be zero
178#uint16 startCode[segCount] # Starting character code for each segment
179#uint16 idDelta[segCount] # Delta for all character codes in segment
180#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0
181#uint16 glyphIndexArray[variable] # Glyph index array
182
183class cmap_format_4(CmapSubtable):
184
185 def decompile(self, data, ttFont):
186 (format, length, self.version, segCountX2,
187 searchRange, entrySelector, rangeShift) = \
188 struct.unpack(cmap_format_4_format, data[:14])
189 assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length)
190 data = data[14:]
191 segCountX2 = int(segCountX2)
192 segCount = segCountX2 / 2
193
194 allcodes = array.array("H")
195 allcodes.fromstring(data)
196 if ttLib.endian <> "big":
197 allcodes.byteswap()
198
199 # divide the data
200 endCode = allcodes[:segCount]
201 allcodes = allcodes[segCount+1:]
202 startCode = allcodes[:segCount]
203 allcodes = allcodes[segCount:]
204 idDelta = allcodes[:segCount]
205 allcodes = allcodes[segCount:]
206 idRangeOffset = allcodes[:segCount]
207 glyphIndexArray = allcodes[segCount:]
208
209 # build 2-byte character mapping
210 cmap = {}
211 for i in range(len(startCode) - 1): # don't do 0xffff!
212 for charCode in range(startCode[i], endCode[i] + 1):
213 rangeOffset = idRangeOffset[i]
214 if rangeOffset == 0:
215 glyphID = charCode + idDelta[i]
216 else:
217 # *someone* needs to get killed.
218 index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
219 if glyphIndexArray[index] <> 0: # if not missing glyph
220 glyphID = glyphIndexArray[index] + idDelta[i]
221 else:
222 glyphID = 0 # missing glyph
223 cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000)
224 self.cmap = cmap
225
226 def compile(self, ttFont):
227 from fontTools.ttLib.sfnt import maxpoweroftwo
228
229 codes = self.cmap.items()
230 codes.sort()
231
232 # build startCode and endCode lists
233 last = codes[0][0]
234 endCode = []
235 startCode = [last]
236 for charCode, glyphName in codes[1:]: # skip the first code, it's the first start code
237 if charCode == last + 1:
238 last = charCode
239 continue
240 endCode.append(last)
241 startCode.append(charCode)
242 last = charCode
243 endCode.append(last)
244 startCode.append(0xffff)
245 endCode.append(0xffff)
246
247 # build up rest of cruft.
248 idDelta = []
249 idRangeOffset = []
250 glyphIndexArray = []
251
252 for i in range(len(endCode)-1): # skip the closing codes (0xffff)
253 indices = []
254 for charCode in range(startCode[i], endCode[i]+1):
255 indices.append(ttFont.getGlyphID(self.cmap[charCode]))
256 if indices == range(indices[0], indices[0] + len(indices)):
257 idDelta.append((indices[0] - startCode[i]) % 0x10000)
258 idRangeOffset.append(0)
259 else:
260 # someone *definitely* needs to get killed.
261 idDelta.append(0)
262 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
263 glyphIndexArray = glyphIndexArray + indices
264 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
265 idRangeOffset.append(0)
266
267 # Insane.
268 segCount = len(endCode)
269 segCountX2 = segCount * 2
270 maxexponent = maxpoweroftwo(segCount)
271 searchRange = 2 * (2 ** maxexponent)
272 entrySelector = maxexponent
273 rangeShift = 2 * segCount - searchRange
274
275 allcodes = array.array("H",
276 endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray)
277 if ttLib.endian <> "big":
278 allcodes.byteswap()
279 data = allcodes.tostring()
280 length = struct.calcsize(cmap_format_4_format) + len(data)
281 header = struct.pack(cmap_format_4_format, self.format, length, self.version,
282 segCountX2, searchRange, entrySelector, rangeShift)
283 return header + data
284
285 def toXML(self, writer, ttFont):
286 from fontTools.unicode import Unicode
287 codes = self.cmap.items()
288 codes.sort()
289 writer.begintag(self.__class__.__name__, [
290 ("platformID", self.platformID),
291 ("platEncID", self.platEncID),
292 ("version", self.version),
293 ])
294 writer.newline()
295
296 for code, name in codes:
297 writer.simpletag("map", code=hex(code), name=name)
298 writer.comment(Unicode[code])
299 writer.newline()
300
301 writer.endtag(self.__class__.__name__)
302 writer.newline()
303
304 def fromXML(self, (name, attrs, content), ttFont):
305 self.version = safeEval(attrs["version"])
306 self.cmap = {}
307 for element in content:
308 if type(element) <> type(()):
309 continue
310 name, attrs, content = element
311 if name <> "map":
312 continue
313 self.cmap[safeEval(attrs["code"])] = attrs["name"]
314
315
316class cmap_format_6(CmapSubtable):
317
318 def decompile(self, data, ttFont):
319 format, length, version, firstCode, entryCount = struct.unpack(
320 ">HHHHH", data[:10])
321 self.version = int(version)
322 firstCode = int(firstCode)
323 self.version = int(version)
324 data = data[10:]
Justf6b15632000-08-23 12:33:14 +0000325 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!!
Just7842e561999-12-16 21:34:53 +0000326 glyphIndexArray = array.array("H")
Justf6b15632000-08-23 12:33:14 +0000327 glyphIndexArray.fromstring(data[:2 * entryCount])
Just7842e561999-12-16 21:34:53 +0000328 if ttLib.endian <> "big":
329 glyphIndexArray.byteswap()
330 self.cmap = cmap = {}
331 for i in range(len(glyphIndexArray)):
332 glyphID = glyphIndexArray[i]
333 glyphName = ttFont.getGlyphName(glyphID)
334 cmap[i+firstCode] = glyphName
335
336 def compile(self, ttFont):
337 codes = self.cmap.keys()
338 codes.sort()
339 assert codes == range(codes[0], codes[0] + len(codes))
340 glyphIndexArray = array.array("H", [0] * len(codes))
341 firstCode = codes[0]
342 for i in range(len(codes)):
343 code = codes[i]
344 glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code])
345 if ttLib.endian <> "big":
346 glyphIndexArray.byteswap()
347 data = glyphIndexArray.tostring()
348 header = struct.pack(">HHHHH",
349 6, len(data) + 10, self.version, firstCode, len(self.cmap))
350 return header + data
351
352 def toXML(self, writer, ttFont):
353 codes = self.cmap.items()
354 codes.sort()
355 writer.begintag(self.__class__.__name__, [
356 ("platformID", self.platformID),
357 ("platEncID", self.platEncID),
358 ("version", self.version),
359 ])
360 writer.newline()
361
362 for code, name in codes:
363 writer.simpletag("map", code=hex(code), name=name)
364 writer.newline()
365
366 writer.endtag(self.__class__.__name__)
367 writer.newline()
368
369 def fromXML(self, (name, attrs, content), ttFont):
370 self.version = safeEval(attrs["version"])
371 self.cmap = {}
372 for element in content:
373 if type(element) <> type(()):
374 continue
375 name, attrs, content = element
376 if name <> "map":
377 continue
378 self.cmap[safeEval(attrs["code"])] = attrs["name"]
379
380
381class cmap_format_unknown(CmapSubtable):
382
383 def decompile(self, data, ttFont):
384 self.data = data
385
386 def compile(self, ttFont):
387 return self.data
388
389
390cmap_classes = {
391 0: cmap_format_0,
392 2: cmap_format_2,
393 4: cmap_format_4,
394 6: cmap_format_6,
395 }
396
397