Blame - Lib/fontTools/ttLib/tables/_c_m_a_p.py - platform/external/fonttools

blob: 57b81c474c136ced558618a13dc9f6228d8bf33c [file] [log] [blame]

Just	7842e56	1999-12-16 21:34:53 +0000	[diff] [blame]	1	import DefaultTable
				2	import struct
				3	import string
				4	import array
				5	from fontTools import ttLib
				6	from fontTools.misc.textTools import safeEval, readHex
				7
				8
				9	class table__c_m_a_p(DefaultTable.DefaultTable):
				10
				11	def getcmap(self, platformID, platEncID):
				12	for subtable in self.tables:
				13	if (subtable.platformID == platformID and
				14	subtable.platEncID == platEncID):
				15	return subtable
				16	return None # not found
				17
				18	def decompile(self, data, ttFont):
				19	tableVersion, numSubTables = struct.unpack(">HH", data[:4])
				20	self.tableVersion = int(tableVersion)
				21	self.tables = tables = []
				22	for i in range(numSubTables):
				23	platformID, platEncID, offset = struct.unpack(
				24	">HHl", data[4+i8:4+(i+1)8])
				25	platformID, platEncID = int(platformID), int(platEncID)
				26	format, length = struct.unpack(">HH", data[offset:offset+4])
				27	if not cmap_classes.has_key(format):
				28	table = cmap_format_unknown(format)
				29	else:
				30	table = cmap_classes[format](format)
				31	table.platformID = platformID
				32	table.platEncID = platEncID
				33	table.decompile(data[offset:offset+int(length)], ttFont)
				34	tables.append(table)
				35
				36	def compile(self, ttFont):
				37	self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__()
				38	numSubTables = len(self.tables)
				39	totalOffset = 4 + 8 * numSubTables
				40	data = struct.pack(">HH", self.tableVersion, numSubTables)
				41	tableData = ""
				42	done = {} # remember the data so we can reuse the "pointers"
				43	for table in self.tables:
				44	chunk = table.compile(ttFont)
				45	if done.has_key(chunk):
				46	offset = done[chunk]
				47	else:
				48	offset = done[chunk] = totalOffset + len(tableData)
Just	1b85098	2000-06-07 18:25:44 +0000	[diff] [blame]	49	tableData = tableData + chunk
Just	7842e56	1999-12-16 21:34:53 +0000	[diff] [blame]	50	data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
				51	return data + tableData
				52
				53	def toXML(self, writer, ttFont):
				54	writer.simpletag("tableVersion", version=self.tableVersion)
				55	writer.newline()
				56	for table in self.tables:
				57	table.toXML(writer, ttFont)
				58
				59	def fromXML(self, (name, attrs, content), ttFont):
				60	if name == "tableVersion":
				61	self.tableVersion = safeEval(attrs["version"])
				62	return
				63	if name[:12] <> "cmap_format_":
				64	return
				65	if not hasattr(self, "tables"):
				66	self.tables = []
				67	format = safeEval(name[12])
				68	if not cmap_classes.has_key(format):
				69	table = cmap_format_unknown(format)
				70	else:
				71	table = cmap_classes[format](format)
				72	table.platformID = safeEval(attrs["platformID"])
				73	table.platEncID = safeEval(attrs["platEncID"])
				74	table.fromXML((name, attrs, content), ttFont)
				75	self.tables.append(table)
				76
				77
				78	class CmapSubtable:
				79
				80	def __init__(self, format):
				81	self.format = format
				82
				83	def toXML(self, writer, ttFont):
				84	writer.begintag(self.__class__.__name__, [
				85	("platformID", self.platformID),
				86	("platEncID", self.platEncID),
				87	])
				88	writer.newline()
				89	writer.dumphex(self.compile(ttFont))
				90	writer.endtag(self.__class__.__name__)
				91	writer.newline()
				92
				93	def fromXML(self, (name, attrs, content), ttFont):
				94	self.decompile(readHex(content), ttFont)
				95
				96	def __cmp__(self, other):
				97	# implemented so that list.sort() sorts according to the cmap spec.
				98	selfTuple = (
				99	self.platformID,
				100	self.platEncID,
				101	self.version,
				102	self.__dict__)
				103	otherTuple = (
				104	other.platformID,
				105	other.platEncID,
				106	other.version,
				107	other.__dict__)
				108	return cmp(selfTuple, otherTuple)
				109
				110
				111	class cmap_format_0(CmapSubtable):
				112
				113	def decompile(self, data, ttFont):
				114	format, length, version = struct.unpack(">HHH", data[:6])
				115	self.version = int(version)
				116	assert len(data) == 262 == length
				117	glyphIdArray = array.array("B")
				118	glyphIdArray.fromstring(data[6:])
				119	self.cmap = cmap = {}
				120	for charCode in range(len(glyphIdArray)):
				121	cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode])
				122
				123	def compile(self, ttFont):
				124	charCodes = self.cmap.keys()
				125	charCodes.sort()
				126	assert charCodes == range(256) # charCodes[charCode] == charCode
				127	for charCode in charCodes:
				128	# reusing the charCodes list!
				129	charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode])
				130	glyphIdArray = array.array("B", charCodes)
				131	data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring()
				132	assert len(data) == 262
				133	return data
				134
				135	def toXML(self, writer, ttFont):
				136	writer.begintag(self.__class__.__name__, [
				137	("platformID", self.platformID),
				138	("platEncID", self.platEncID),
				139	("version", self.version),
				140	])
				141	writer.newline()
				142	items = self.cmap.items()
				143	items.sort()
				144	for code, name in items:
				145	writer.simpletag("map", code=hex(code), name=name)
				146	writer.newline()
				147	writer.endtag(self.__class__.__name__)
				148	writer.newline()
				149
				150	def fromXML(self, (name, attrs, content), ttFont):
				151	self.version = safeEval(attrs["version"])
				152	self.cmap = {}
				153	for element in content:
				154	if type(element) <> type(()):
				155	continue
				156	name, attrs, content = element
				157	if name <> "map":
				158	continue
				159	self.cmap[safeEval(attrs["code"])] = attrs["name"]
				160
				161
				162
				163	class cmap_format_2(CmapSubtable):
				164
				165	def decompile(self, data, ttFont):
				166	format, length, version = struct.unpack(">HHH", data[:6])
				167	self.version = int(version)
				168	self.data = data
				169
				170	def compile(self, ttFont):
				171	return self.data
				172
				173
				174	cmap_format_4_format = ">7H"
				175
				176	#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF.
				177	#uint16 reservedPad # This value should be zero
				178	#uint16 startCode[segCount] # Starting character code for each segment
				179	#uint16 idDelta[segCount] # Delta for all character codes in segment
				180	#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0
				181	#uint16 glyphIndexArray[variable] # Glyph index array
				182
				183	class cmap_format_4(CmapSubtable):
				184
				185	def decompile(self, data, ttFont):
				186	(format, length, self.version, segCountX2,
				187	searchRange, entrySelector, rangeShift) = \
				188	struct.unpack(cmap_format_4_format, data[:14])
				189	assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length)
				190	data = data[14:]
				191	segCountX2 = int(segCountX2)
				192	segCount = segCountX2 / 2
				193
				194	allcodes = array.array("H")
				195	allcodes.fromstring(data)
				196	if ttLib.endian <> "big":
				197	allcodes.byteswap()
				198
				199	# divide the data
				200	endCode = allcodes[:segCount]
				201	allcodes = allcodes[segCount+1:]
				202	startCode = allcodes[:segCount]
				203	allcodes = allcodes[segCount:]
				204	idDelta = allcodes[:segCount]
				205	allcodes = allcodes[segCount:]
				206	idRangeOffset = allcodes[:segCount]
				207	glyphIndexArray = allcodes[segCount:]
				208
				209	# build 2-byte character mapping
				210	cmap = {}
				211	for i in range(len(startCode) - 1): # don't do 0xffff!
				212	for charCode in range(startCode[i], endCode[i] + 1):
				213	rangeOffset = idRangeOffset[i]
				214	if rangeOffset == 0:
				215	glyphID = charCode + idDelta[i]
				216	else:
				217	# someone needs to get killed.
				218	index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
				219	if glyphIndexArray[index] <> 0: # if not missing glyph
				220	glyphID = glyphIndexArray[index] + idDelta[i]
				221	else:
				222	glyphID = 0 # missing glyph
				223	cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000)
				224	self.cmap = cmap
				225
				226	def compile(self, ttFont):
				227	from fontTools.ttLib.sfnt import maxpoweroftwo
				228
				229	codes = self.cmap.items()
				230	codes.sort()
				231
				232	# build startCode and endCode lists
				233	last = codes[0][0]
				234	endCode = []
				235	startCode = [last]
				236	for charCode, glyphName in codes[1:]: # skip the first code, it's the first start code
				237	if charCode == last + 1:
				238	last = charCode
				239	continue
				240	endCode.append(last)
				241	startCode.append(charCode)
				242	last = charCode
				243	endCode.append(last)
				244	startCode.append(0xffff)
				245	endCode.append(0xffff)
				246
				247	# build up rest of cruft.
				248	idDelta = []
				249	idRangeOffset = []
				250	glyphIndexArray = []
				251
				252	for i in range(len(endCode)-1): # skip the closing codes (0xffff)
				253	indices = []
				254	for charCode in range(startCode[i], endCode[i]+1):
				255	indices.append(ttFont.getGlyphID(self.cmap[charCode]))
				256	if indices == range(indices[0], indices[0] + len(indices)):
				257	idDelta.append((indices[0] - startCode[i]) % 0x10000)
				258	idRangeOffset.append(0)
				259	else:
				260	# someone definitely needs to get killed.
				261	idDelta.append(0)
				262	idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
				263	glyphIndexArray = glyphIndexArray + indices
				264	idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
				265	idRangeOffset.append(0)
				266
				267	# Insane.
				268	segCount = len(endCode)
				269	segCountX2 = segCount * 2
				270	maxexponent = maxpoweroftwo(segCount)
				271	searchRange = 2 * (2 ** maxexponent)
				272	entrySelector = maxexponent
				273	rangeShift = 2 * segCount - searchRange
				274
				275	allcodes = array.array("H",
				276	endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray)
				277	if ttLib.endian <> "big":
				278	allcodes.byteswap()
				279	data = allcodes.tostring()
				280	length = struct.calcsize(cmap_format_4_format) + len(data)
				281	header = struct.pack(cmap_format_4_format, self.format, length, self.version,
				282	segCountX2, searchRange, entrySelector, rangeShift)
				283	return header + data
				284
				285	def toXML(self, writer, ttFont):
				286	from fontTools.unicode import Unicode
				287	codes = self.cmap.items()
				288	codes.sort()
				289	writer.begintag(self.__class__.__name__, [
				290	("platformID", self.platformID),
				291	("platEncID", self.platEncID),
				292	("version", self.version),
				293	])
				294	writer.newline()
				295
				296	for code, name in codes:
				297	writer.simpletag("map", code=hex(code), name=name)
				298	writer.comment(Unicode[code])
				299	writer.newline()
				300
				301	writer.endtag(self.__class__.__name__)
				302	writer.newline()
				303
				304	def fromXML(self, (name, attrs, content), ttFont):
				305	self.version = safeEval(attrs["version"])
				306	self.cmap = {}
				307	for element in content:
				308	if type(element) <> type(()):
				309	continue
				310	name, attrs, content = element
				311	if name <> "map":
				312	continue
				313	self.cmap[safeEval(attrs["code"])] = attrs["name"]
				314
				315
				316	class cmap_format_6(CmapSubtable):
				317
				318	def decompile(self, data, ttFont):
				319	format, length, version, firstCode, entryCount = struct.unpack(
				320	">HHHHH", data[:10])
				321	self.version = int(version)
				322	firstCode = int(firstCode)
				323	self.version = int(version)
				324	data = data[10:]
Just	f6b1563	2000-08-23 12:33:14 +0000	[diff] [blame^]	325	#assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!!
Just	7842e56	1999-12-16 21:34:53 +0000	[diff] [blame]	326	glyphIndexArray = array.array("H")
Just	f6b1563	2000-08-23 12:33:14 +0000	[diff] [blame^]	327	glyphIndexArray.fromstring(data[:2 * entryCount])
Just	7842e56	1999-12-16 21:34:53 +0000	[diff] [blame]	328	if ttLib.endian <> "big":
				329	glyphIndexArray.byteswap()
				330	self.cmap = cmap = {}
				331	for i in range(len(glyphIndexArray)):
				332	glyphID = glyphIndexArray[i]
				333	glyphName = ttFont.getGlyphName(glyphID)
				334	cmap[i+firstCode] = glyphName
				335
				336	def compile(self, ttFont):
				337	codes = self.cmap.keys()
				338	codes.sort()
				339	assert codes == range(codes[0], codes[0] + len(codes))
				340	glyphIndexArray = array.array("H", [0] * len(codes))
				341	firstCode = codes[0]
				342	for i in range(len(codes)):
				343	code = codes[i]
				344	glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code])
				345	if ttLib.endian <> "big":
				346	glyphIndexArray.byteswap()
				347	data = glyphIndexArray.tostring()
				348	header = struct.pack(">HHHHH",
				349	6, len(data) + 10, self.version, firstCode, len(self.cmap))
				350	return header + data
				351
				352	def toXML(self, writer, ttFont):
				353	codes = self.cmap.items()
				354	codes.sort()
				355	writer.begintag(self.__class__.__name__, [
				356	("platformID", self.platformID),
				357	("platEncID", self.platEncID),
				358	("version", self.version),
				359	])
				360	writer.newline()
				361
				362	for code, name in codes:
				363	writer.simpletag("map", code=hex(code), name=name)
				364	writer.newline()
				365
				366	writer.endtag(self.__class__.__name__)
				367	writer.newline()
				368
				369	def fromXML(self, (name, attrs, content), ttFont):
				370	self.version = safeEval(attrs["version"])
				371	self.cmap = {}
				372	for element in content:
				373	if type(element) <> type(()):
				374	continue
				375	name, attrs, content = element
				376	if name <> "map":
				377	continue
				378	self.cmap[safeEval(attrs["code"])] = attrs["name"]
				379
				380
				381	class cmap_format_unknown(CmapSubtable):
				382
				383	def decompile(self, data, ttFont):
				384	self.data = data
				385
				386	def compile(self, ttFont):
				387	return self.data
				388
				389
				390	cmap_classes = {
				391	0: cmap_format_0,
				392	2: cmap_format_2,
				393	4: cmap_format_4,
				394	6: cmap_format_6,
				395	}
				396
				397