- added assembler: we've got a full round trip now!
- added toXML() and fromXML() methods
git-svn-id: svn://svn.code.sf.net/p/fonttools/code/trunk@74 4cde692c-a291-49d1-8350-778aa11640f8
diff --git a/Lib/fontTools/ttLib/tables/ttProgram.py b/Lib/fontTools/ttLib/tables/ttProgram.py
index 8dd9faa..917c5e6 100644
--- a/Lib/fontTools/ttLib/tables/ttProgram.py
+++ b/Lib/fontTools/ttLib/tables/ttProgram.py
@@ -1,13 +1,14 @@
"""ttLib.tables.ttProgram.py -- Assembler/disassembler for TrueType bytecode programs."""
import array
-
+import re, string
+from fontTools.misc.textTools import num2binary, binary2num, readHex
# first, the list of instructions that eat bytes or words from the instruction stream
streamInstructions = [
# ------ ----------- ----- ------------------------ --- ------ ---------------------------------- --------------
-# opcode mnemonic argbits descriptive name pops pushes eats from instruction stream pushes
+# opcode mnemonic argBits descriptive name pops pushes eats from instruction stream pushes
# ------ ----------- ----- ------------------------ --- ------ ---------------------------------- --------------
(0x40, 'NPUSHB', 0, 'PushNBytes', 0, -1), # n, b1, b2,...bn b1,b2...bn
(0x41, 'NPUSHW', 0, 'PushNWords', 0, -1), # n, w1, w2,...w w1,w2...wn
@@ -21,7 +22,7 @@
instructions = [
# ------ ----------- ----- ------------------------ --- ------ ---------------------------------- --------------
-# opcode mnemonic argbits descriptive name pops pushes pops pushes
+# opcode mnemonic argBits descriptive name pops pushes pops pushes
# ------ ----------- ----- ------------------------ --- ------ ---------------------------------- --------------
(0x7f, 'AA', 0, 'AdjustAngle', 1, 0), # p -
(0x64, 'ABS', 0, 'Absolute', 1, 1), # n |n|
@@ -152,34 +153,58 @@
value = value >> 1
return s
-def makeOpcodeDict(instructionList):
+
+_mnemonicPat = re.compile("[A-Z][A-Z0-9]*$")
+
+def _makeDict(instructionList):
opcodeDict = {}
- for op, mnemonic, argbits, name, pops, pushes in instructionList:
- if argbits:
+ mnemonicDict = {}
+ for op, mnemonic, argBits, name, pops, pushes in instructionList:
+ assert _mnemonicPat.match(mnemonic)
+ mnemonicDict[mnemonic] = op, argBits, name
+ if argBits:
argoffset = op
- for i in range(1 << argbits):
- opcodeDict[op+i] = mnemonic, argbits, argoffset, name
+ for i in range(1 << argBits):
+ opcodeDict[op+i] = mnemonic, argBits, argoffset, name
else:
opcodeDict[op] = mnemonic, 0, 0, name
- return opcodeDict
+ return opcodeDict, mnemonicDict
-streamOpcodeDict = makeOpcodeDict(streamInstructions)
-opcodeDict = makeOpcodeDict(instructions)
+streamOpcodeDict, streamMnemonicDict = _makeDict(streamInstructions)
+opcodeDict, mnemonicDict = _makeDict(instructions)
tt_instructions_error = "TT instructions error"
+_comment = r"/\*.*?\*/"
+_instruction = r"([A-Z][A-Z0-9]*)\s*\[(.*?)\]"
+_number = r"-?[0-9]+"
+_token = "(%s)|(%s)|(%s)" % (_instruction, _number, _comment)
+
+_tokenRE = re.compile(_token)
+_whiteRE = re.compile(r"\s*")
+
+def _skipWhite(data, pos, _whiteRE=_whiteRE):
+ m = _whiteRE.match(data, pos)
+ newPos = m.regs[0][1]
+ assert newPos >= pos
+ return newPos
+
+
class Program:
def __init__(self):
pass
def fromBytecode(self, bytecode):
- self.bytecode = array.array("B")
- self.bytecode.fromstring(bytecode)
+ self.bytecode = array.array("B", bytecode)
+ if hasattr(self, "assembly"):
+ del self.assembly
def fromAssembly(self, assembly):
self.assembly = assembly
+ if hasattr(self, "bytecode"):
+ del self.bytecode
def getBytecode(self):
if not hasattr(self, "bytecode"):
@@ -191,8 +216,121 @@
self._disassemble()
return self.assembly
- def _assemble(self):
- xxx
+ def toXML(self, writer, ttFont):
+ if ttFont.disassembleInstructions:
+ assembly = self.getAssembly()
+ writer.begintag("assembly")
+ writer.newline()
+ i = 0
+ nInstr = len(assembly)
+ while i < nInstr:
+ instr = assembly[i]
+ writer.write(instr)
+ writer.newline()
+ m = _pushCountPat.match(instr)
+ i = i + 1
+ if m:
+ nValues = int(m.group(1))
+ line = []
+ j = 0
+ for j in range(nValues):
+ if j and not (j % 25):
+ writer.write(string.join(line, " "))
+ writer.newline()
+ line = []
+ line.append(assembly[i+j])
+ writer.write(string.join(line, " "))
+ writer.newline()
+ i = i + j + 1
+ writer.endtag("assembly")
+ else:
+ writer.begintag("bytecode")
+ writer.newline()
+ writer.dumphex(self.getBytecode())
+ writer.endtag("bytecode")
+
+ def fromXML(self, (name, attrs, content), ttFont):
+ if name == "assembly":
+ self.fromAssembly(content)
+ self._assemble()
+ del self.assembly
+ else:
+ assert name == "bytecode"
+ self.fromBytecode(readHex(content))
+
+ def _assemble(self,
+ skipWhite=_skipWhite, mnemonicDict=mnemonicDict, strip=string.strip,
+ binary2num=binary2num):
+ assembly = self.assembly
+ if type(assembly) == type([]):
+ assembly = string.join(assembly, " ")
+ bytecode = []
+ push = bytecode.append
+ lenAssembly = len(assembly)
+ pos = skipWhite(assembly, 0)
+ while pos < lenAssembly:
+ m = _tokenRE.match(assembly, pos)
+ if m is None:
+ raise tt_instructions_error, "Syntax error in TT program (%s)" % assembly[pos-5:pos+15]
+ dummy, mnemonic, arg, number, comment = m.groups()
+ pos = m.regs[0][1]
+ if comment:
+ continue
+
+ arg = strip(arg)
+ if mnemonic not in ("NPUSHB", "NPUSHW", "PUSHB", "PUSHW"):
+ op, argBits, name = mnemonicDict[mnemonic]
+ if len(arg) <> argBits:
+ raise tt_instructions_error, "Incorrect number of argument bits (%s[%s])" % (mnemonic, arg)
+ if arg:
+ arg = binary2num(arg)
+ push(op + arg)
+ else:
+ push(op)
+ else:
+ args = []
+ while pos < lenAssembly:
+ pos = skipWhite(assembly, pos)
+ m = _tokenRE.match(assembly, pos)
+ if m is None:
+ raise tt_instructions_error, "Syntax error in TT program (%s)" % assembly[pos:pos+15]
+ dummy, mnemonic, arg, number, comment = m.groups()
+ if number is None and comment is None:
+ break
+ pos = m.regs[0][1]
+ if comment is not None:
+ continue
+ args.append(int(number))
+ if max(args) > 255 or min(args) < 0:
+ words = 1
+ mnemonic = "PUSHW"
+ else:
+ words = 0
+ mnemonic = "PUSHB"
+ nArgs = len(args)
+ if nArgs <= 8:
+ op, argBits, name = streamMnemonicDict[mnemonic]
+ op = op + nArgs - 1
+ push(op)
+ elif nArgs < 256:
+ mnemonic = "N" + mnemonic
+ op, argBits, name = streamMnemonicDict[mnemonic]
+ push(op)
+ push(nArgs)
+ else:
+ raise tt_instructions_error, "More than 255 push arguments (%s)" % nArgs
+ if words:
+ for value in args:
+ push((value >> 8) & 0xff)
+ push(value & 0xff)
+ else:
+ for value in args:
+ push(value)
+ pos = skipWhite(assembly, pos)
+
+ if bytecode:
+ assert max(bytecode) < 256 and min(bytecode) >= 0
+ self.bytecode = array.array("B", bytecode)
def _disassemble(self):
assembly = []
@@ -203,47 +341,57 @@
op = bytecode[i]
arg = 0
try:
- mnemonic, argbits, argoffset, name = opcodeDict[op]
+ mnemonic, argBits, argoffset, name = opcodeDict[op]
except KeyError:
try:
- mnemonic, argbits, argoffset, name = streamOpcodeDict[op]
+ mnemonic, argBits, argoffset, name = streamOpcodeDict[op]
except KeyError:
raise tt_instructions_error, "illegal opcode: 0x%.2x" % op
- pushbytes = pushwords = 0
- if argbits:
+ pushBytes = pushWords = 0
+ if argBits:
if mnemonic == "PUSHB":
- pushbytes = op - argoffset + 1
+ pushBytes = op - argoffset + 1
else:
- pushwords = op - argoffset + 1
+ pushWords = op - argoffset + 1
else:
i = i + 1
if mnemonic == "NPUSHB":
- pushbytes = bytecode[i]
+ pushBytes = bytecode[i]
else:
- pushwords = bytecode[i]
+ pushWords = bytecode[i]
i = i + 1
- assembly.append(mnemonic + "[ ]")
- for j in range(pushbytes):
- assembly.append(`bytecode[i]`)
+ nValues = pushBytes or pushWords
+ assert nValues > 0
+ if nValues == 1:
+ assembly.append("%s[ ] /* %s value pushed */" % (mnemonic, nValues))
+ else:
+ assembly.append("%s[ ] /* %s values pushed */" % (mnemonic, nValues))
+ for j in range(pushBytes):
+ value = bytecode[i]
+ assembly.append(`value`)
i = i + 1
- for j in range(0, pushwords, 2):
- assembly.append(`(bytecode[i] << 8) + bytecode[i+1]`)
+ for j in range(pushWords):
+ # cast to signed int16
+ value = (bytecode[i] << 8) | bytecode[i+1]
+ if value >= 0x8000:
+ value = value - 0x10000
+ assembly.append(`value`)
i = i + 2
else:
- if argbits:
- assembly.append(mnemonic + "[%s]" % bitRepr(op - argoffset, argbits))
+ if argBits:
+ assembly.append(mnemonic + "[%s]" % num2binary(op - argoffset, argBits))
else:
assembly.append(mnemonic + "[ ]")
i = i + 1
self.assembly = assembly
- del self.bytecode
-fpgm = '@\01476&%\037\023\022\015\014\005\004\002, \260\003%E#E#ah\212 Eh \212#D`D-,KRXED\033!!Y-, EhD \260\001` E\260Fvh\030\212E`D-,\260\022+\260\002%E\260\002%Ej\260@\213`\260\002%#D!!!-,\260\023+\260\002%E\260\002%Ej\270\377\300\214`\260\002%#D!!!-,\261\000\003%EhTX\260\003%E\260\003%E`h \260\004%#D\260\004%#D\033\260\003% Eh \212#D\260\003%Eh`\260\003%#DY-,\260\003% Eh \212#D\260\003%Eh`\260\003%#D-,KRXED\033!!Y-,F#F`\212\212F# F\212`\212a\270\377\200b# \020#\212\261KK\212pE` \260\000PX\260\001a\270\377\272\213\033\260F\214Y\260\020`h\001:-, E\260\003%FRX?\033!\021Y-,KS#KQZX E\212`D\033!!Y-,KS#KQZX8\033!!Y-'
-gpgm = '@\022\011\003\207@\005\200\004\207\000\010\007\202\001\010\004\202\000\010\000\020\320\355\020\336\355\001\020\336\375\032}\336\032\030\375\31610'
-
-p = Program()
-p.fromBytecode(fpgm)
-for line in p.getAssembly():
- print line
+if __name__ == "__main__":
+ bc = """@;:9876543210/.-,+*)(\'&%$#"! \037\036\035\034\033\032\031\030\027\026\025\024\023\022\021\020\017\016\015\014\013\012\011\010\007\006\005\004\003\002\001\000,\001\260\030CXEj\260\031C`\260F#D#\020 \260FN\360M/\260\000\022\033!#\0213Y-,\001\260\030CX\260\005+\260\000\023K\260\024PX\261\000@8Y\260\006+\033!#\0213Y-,\001\260\030CXN\260\003%\020\362!\260\000\022M\033 E\260\004%\260\004%#Jad\260(RX!#\020\326\033\260\003%\020\362!\260\000\022YY-,\260\032CX!!\033\260\002%\260\002%I\260\003%\260\003%Ja d\260\020PX!!!\033\260\003%\260\003%I\260\000PX\260\000PX\270\377\3428!\033\260\0208!Y\033\260\000RX\260\0368!\033\270\377\3608!YYYY-,\001\260\030CX\260\005+\260\000\023K\260\024PX\271\000\000\377\3008Y\260\006+\033!#\0213Y-,N\001\212\020\261F\031CD\260\000\024\261\000F\342\260\000\025\271\000\000\377\3608\000\260\000<\260(+\260\002%\020\260\000<-,\001\030\260\000/\260\001\024\362\260\001\023\260\001\025M\260\000\022-,\001\260\030CX\260\005+\260\000\023\271\000\000\377\3408\260\006+\033!#\0213Y-,\001\260\030CXEdj#Edi\260\031Cd``\260F#D#\020 \260F\360/\260\000\022\033!! \212 \212RX\0213\033!!YY-,\001\261\013\012C#Ce\012-,\000\261\012\013C#C\013-,\000\260F#p\261\001F>\001\260F#p\261\002FE:\261\002\000\010\015-,\260\022+\260\002%E\260\002%Ej\260@\213`\260\002%#D!!!-,\260\023+\260\002%E\260\002%Ej\270\377\300\214`\260\002%#D!!!-,\260\000\260\022+!!!-,\260\000\260\023+!!!-,\001\260\006C\260\007Ce\012-, i\260@a\260\000\213 \261,\300\212\214\270\020\000b`+\014d#da\\X\260\003aY-,\261\000\003%EhT\260\034KPZX\260\003%E\260\003%E`h \260\004%#D\260\004%#D\033\260\003% Eh \212#D\260\003%Eh`\260\003%#DY-,\260\003% Eh \212#D\260\003%Edhe`\260\004%\260\001`#D-,\260\011CX\207!\300\033\260\022CX\207E\260\021+\260G#D\260Gz\344\033\003\212E\030i \260G#D\212\212\207 \260\240QX\260\021+\260G#D\260Gz\344\033!\260Gz\344YYY\030-, \212E#Eh`D-,EjB-,\001\030/-,\001\260\030CX\260\004%\260\004%Id#Edi\260@\213a \260\200bj\260\002%\260\002%a\214\260\031C`\260F#D!\212\020\260F\366!\033!!!!Y-,\001\260\030CX\260\002%E\260\002%Ed`j\260\003%Eja \260\004%Ej \212\213e\260\004%#D\214\260\003%#D!!\033 EjD EjDY-,\001 E\260\000U\260\030CZXEh#Ei\260@\213a \260\200bj \212#a \260\003%\213e\260\004%#D\214\260\003%#D!!\033!!\260\031+Y-,\001\212\212Ed#EdadB-,\260\004%\260\004%\260\031+\260\030CX\260\004%\260\004%\260\003%\260\033+\001\260\002%C\260@T\260\002%C\260\000TZX\260\003% E\260@aDY\260\002%C\260\000T\260\002%C\260@TZX\260\004% E\260@`DYY!!!!-,\001KRXC\260\002%E#aD\033!!Y-,\001KRXC\260\002%E#`D\033!!Y-,KRXED\033!!Y-,\001 \260\003%#I\260@`\260 c \260\000RX#\260\002%8#\260\002%e8\000\212c8\033!!!!!Y\001-,KPXED\033!!Y-,\001\260\005%\020# \212\365\000\260\001`#\355\354-,\001\260\005%\020# \212\365\000\260\001a#\355\354-,\001\260\006%\020\365\000\355\354-,F#F`\212\212F# F\212`\212a\270\377\200b# \020#\212\261KK\212pE` \260\000PX\260\001a\270\377\272\213\033\260F\214Y\260\020`h\001:-, E\260\003%FRX\260\002%F ha\260\003%\260\003%?#!8\033!\021Y-, E\260\003%FPX\260\002%F ha\260\003%\260\003%?#!8\033!\021Y-,\000\260\007C\260\006C\013-,\212\020\354-,\260\014CX!\033 F\260\000RX\270\377\3608\033\260\0208YY-, \260\000UX\270\020\000c\260\003%Ed\260\003%Eda\260\000SX\260\002\033\260@a\260\003Y%EiSXED\033!!Y\033!\260\002%E\260\002%Ead\260(QXED\033!!YY-,!!\014d#d\213\270@\000b-,!\260\200QX\014d#d\213\270 \000b\033\262\000@/+Y\260\002`-,!\260\300QX\014d#d\213\270\025Ub\033\262\000\200/+Y\260\002`-,\014d#d\213\270@\000b`#!-,KSX\260\004%\260\004%Id#Edi\260@\213a \260\200bj\260\002%\260\002%a\214\260F#D!\212\020\260F\366!\033!\212\021#\022 9/Y-,\260\002%\260\002%Id\260\300TX\270\377\3708\260\0108\033!!Y-,\260\023CX\003\033\002Y-,\260\023CX\002\033\003Y-,\260\012+#\020 <\260\027+-,\260\002%\270\377\3608\260(+\212\020# \320#\260\020+\260\005CX\300\033<Y \020\021\260\000\022\001-,KS#KQZX8\033!!Y-,\001\260\002%\020\320#\311\001\260\001\023\260\000\024\020\260\001<\260\001\026-,\001\260\000\023\260\001\260\003%I\260\003\0278\260\001\023-,KS#KQZX E\212`D\033!!Y-, 9/-"""
+
+ p = Program()
+ p.fromBytecode(bc)
+ as = p.getAssembly()
+ p.fromAssembly(as)
+ print bc == p.getBytecode()