- added assembler: we've got a full round trip now!
- added toXML() and fromXML() methods


git-svn-id: svn://svn.code.sf.net/p/fonttools/code/trunk@74 4cde692c-a291-49d1-8350-778aa11640f8
diff --git a/Lib/fontTools/ttLib/tables/ttProgram.py b/Lib/fontTools/ttLib/tables/ttProgram.py
index 8dd9faa..917c5e6 100644
--- a/Lib/fontTools/ttLib/tables/ttProgram.py
+++ b/Lib/fontTools/ttLib/tables/ttProgram.py
@@ -1,13 +1,14 @@
 """ttLib.tables.ttProgram.py -- Assembler/disassembler for TrueType bytecode programs."""
 
 import array
-
+import re, string
+from fontTools.misc.textTools import num2binary, binary2num, readHex
 
 # first, the list of instructions that eat bytes or words from the instruction stream
 
 streamInstructions = [
 #	------  -----------  -----  ------------------------ ---  ------  ----------------------------------  --------------
-#	opcode     mnemonic argbits         descriptive name pops pushes        eats from instruction stream          pushes
+#	opcode     mnemonic argBits         descriptive name pops pushes        eats from instruction stream          pushes
 #	------  -----------  -----  ------------------------ ---  ------  ----------------------------------  --------------
 	(0x40,    'NPUSHB',     0,             'PushNBytes',  0, -1), #                      n, b1, b2,...bn      b1,b2...bn
 	(0x41,    'NPUSHW',     0,             'PushNWords',  0, -1), #                       n, w1, w2,...w      w1,w2...wn
@@ -21,7 +22,7 @@
 
 instructions = [
 #	------  -----------  -----  ------------------------ ---  ------  ----------------------------------  --------------
-#	opcode     mnemonic  argbits        descriptive name pops pushes                                pops          pushes
+#	opcode     mnemonic  argBits        descriptive name pops pushes                                pops          pushes
 #	------  -----------  -----  ------------------------ ---  ------  ----------------------------------  --------------
 	(0x7f,        'AA',     0,            'AdjustAngle',  1,  0), #                                    p               -
 	(0x64,       'ABS',     0,               'Absolute',  1,  1), #                                    n             |n|
@@ -152,34 +153,58 @@
 		value = value >> 1
 	return s
 
-def makeOpcodeDict(instructionList):
+
+_mnemonicPat = re.compile("[A-Z][A-Z0-9]*$")
+
+def _makeDict(instructionList):
 	opcodeDict = {}
-	for op, mnemonic, argbits, name, pops, pushes in instructionList:
-		if argbits:
+	mnemonicDict = {}
+	for op, mnemonic, argBits, name, pops, pushes in instructionList:
+		assert _mnemonicPat.match(mnemonic)
+		mnemonicDict[mnemonic] = op, argBits, name
+		if argBits:
 			argoffset = op
-			for i in range(1 << argbits):
-				opcodeDict[op+i] = mnemonic, argbits, argoffset, name
+			for i in range(1 << argBits):
+				opcodeDict[op+i] = mnemonic, argBits, argoffset, name
 		else:
 				opcodeDict[op] = mnemonic, 0, 0, name
-	return opcodeDict
+	return opcodeDict, mnemonicDict
 
-streamOpcodeDict = makeOpcodeDict(streamInstructions)
-opcodeDict = makeOpcodeDict(instructions)
+streamOpcodeDict, streamMnemonicDict = _makeDict(streamInstructions)
+opcodeDict, mnemonicDict = _makeDict(instructions)
 
 tt_instructions_error = "TT instructions error"
 
 
+_comment = r"/\*.*?\*/"
+_instruction = r"([A-Z][A-Z0-9]*)\s*\[(.*?)\]"
+_number = r"-?[0-9]+"
+_token = "(%s)|(%s)|(%s)" % (_instruction, _number, _comment)
+
+_tokenRE = re.compile(_token)
+_whiteRE = re.compile(r"\s*")
+
+def _skipWhite(data, pos, _whiteRE=_whiteRE):
+	m = _whiteRE.match(data, pos)
+	newPos = m.regs[0][1]
+	assert newPos >= pos
+	return newPos
+
+
 class Program:
 	
 	def __init__(self):
 		pass
 	
 	def fromBytecode(self, bytecode):
-		self.bytecode = array.array("B")
-		self.bytecode.fromstring(bytecode)
+		self.bytecode = array.array("B", bytecode)
+		if hasattr(self, "assembly"):
+			del self.assembly
 	
 	def fromAssembly(self, assembly):
 		self.assembly = assembly
+		if hasattr(self, "bytecode"):
+			del self.bytecode
 	
 	def getBytecode(self):
 		if not hasattr(self, "bytecode"):
@@ -191,8 +216,121 @@
 			self._disassemble()
 		return self.assembly
 	
-	def _assemble(self):
-		xxx
+	def toXML(self, writer, ttFont):
+		if ttFont.disassembleInstructions:
+			assembly = self.getAssembly()
+			writer.begintag("assembly")
+			writer.newline()
+			i = 0
+			nInstr = len(assembly)
+			while i < nInstr:
+				instr = assembly[i]
+				writer.write(instr)
+				writer.newline()
+				m = _pushCountPat.match(instr)
+				i = i + 1
+				if m:
+					nValues = int(m.group(1))
+					line = []
+					j = 0
+					for j in range(nValues):
+						if j and not (j % 25):
+							writer.write(string.join(line, " "))
+							writer.newline()
+							line = []
+						line.append(assembly[i+j])
+					writer.write(string.join(line, " "))
+					writer.newline()
+					i = i + j + 1
+			writer.endtag("assembly")
+		else:
+			writer.begintag("bytecode")
+			writer.newline()
+			writer.dumphex(self.getBytecode())
+			writer.endtag("bytecode")
+	
+	def fromXML(self, (name, attrs, content), ttFont):
+		if name == "assembly":
+			self.fromAssembly(content)
+			self._assemble()
+			del self.assembly
+		else:
+			assert name == "bytecode"
+			self.fromBytecode(readHex(content))
+	
+	def _assemble(self, 
+			skipWhite=_skipWhite, mnemonicDict=mnemonicDict, strip=string.strip,
+			binary2num=binary2num):
+		assembly = self.assembly
+		if type(assembly) == type([]):
+			assembly = string.join(assembly, " ")
+		bytecode = []
+		push = bytecode.append
+		lenAssembly = len(assembly)
+		pos = skipWhite(assembly, 0)
+		while pos < lenAssembly:
+			m = _tokenRE.match(assembly, pos)
+			if m is None:
+				raise tt_instructions_error, "Syntax error in TT program (%s)" % assembly[pos-5:pos+15]
+			dummy, mnemonic, arg, number, comment = m.groups()
+			pos = m.regs[0][1]
+			if comment:
+				continue
+			
+			arg = strip(arg)
+			if mnemonic not in ("NPUSHB", "NPUSHW", "PUSHB", "PUSHW"):
+				op, argBits, name = mnemonicDict[mnemonic]
+				if len(arg) <> argBits:
+					raise tt_instructions_error, "Incorrect number of argument bits (%s[%s])" % (mnemonic, arg)
+				if arg:
+					arg = binary2num(arg)
+					push(op + arg)
+				else:
+					push(op)
+			else:
+				args = []
+				while pos < lenAssembly:
+					pos = skipWhite(assembly, pos)
+					m = _tokenRE.match(assembly, pos)
+					if m is None:
+						raise tt_instructions_error, "Syntax error in TT program (%s)" % assembly[pos:pos+15]
+					dummy, mnemonic, arg, number, comment = m.groups()
+					if number is None and comment is None:
+						break
+					pos = m.regs[0][1]
+					if comment is not None:
+						continue
+					args.append(int(number))
+				if max(args) > 255 or min(args) < 0:
+					words = 1
+					mnemonic = "PUSHW"
+				else:
+					words = 0
+					mnemonic = "PUSHB"
+				nArgs = len(args)
+				if nArgs <= 8:
+					op, argBits, name = streamMnemonicDict[mnemonic]
+					op = op + nArgs - 1
+					push(op)
+				elif nArgs < 256:
+					mnemonic = "N" + mnemonic
+					op, argBits, name = streamMnemonicDict[mnemonic]
+					push(op)
+					push(nArgs)
+				else:
+					raise tt_instructions_error, "More than 255 push arguments (%s)" % nArgs
+				if words:
+					for value in args:
+						push((value >> 8) & 0xff)
+						push(value & 0xff)
+				else:
+					for value in args:
+						push(value)
+			pos = skipWhite(assembly, pos)
+		
+		if bytecode:
+			assert max(bytecode) < 256 and min(bytecode) >= 0
+		self.bytecode = array.array("B", bytecode)
 	
 	def _disassemble(self):
 		assembly = []
@@ -203,47 +341,57 @@
 			op = bytecode[i]
 			arg = 0
 			try:
-				mnemonic, argbits, argoffset, name = opcodeDict[op]
+				mnemonic, argBits, argoffset, name = opcodeDict[op]
 			except KeyError:
 				try:
-					mnemonic, argbits, argoffset, name = streamOpcodeDict[op]
+					mnemonic, argBits, argoffset, name = streamOpcodeDict[op]
 				except KeyError:
 					raise tt_instructions_error, "illegal opcode: 0x%.2x" % op
-				pushbytes = pushwords = 0
-				if argbits:
+				pushBytes = pushWords = 0
+				if argBits:
 					if mnemonic == "PUSHB":
-						pushbytes = op - argoffset + 1
+						pushBytes = op - argoffset + 1
 					else:
-						pushwords = op - argoffset + 1
+						pushWords = op - argoffset + 1
 				else:
 					i = i + 1
 					if mnemonic == "NPUSHB":
-						pushbytes = bytecode[i]
+						pushBytes = bytecode[i]
 					else:
-						pushwords = bytecode[i]
+						pushWords = bytecode[i]
 				i = i + 1
-				assembly.append(mnemonic + "[ ]")
-				for j in range(pushbytes):
-					assembly.append(`bytecode[i]`)
+				nValues = pushBytes or pushWords
+				assert nValues > 0
+				if nValues == 1:
+					assembly.append("%s[ ]  /* %s value pushed */" % (mnemonic, nValues))
+				else:
+					assembly.append("%s[ ]  /* %s values pushed */" % (mnemonic, nValues))
+				for j in range(pushBytes):
+					value = bytecode[i]
+					assembly.append(`value`)
 					i = i + 1
-				for j in range(0, pushwords, 2):
-					assembly.append(`(bytecode[i] << 8) + bytecode[i+1]`)
+				for j in range(pushWords):
+					# cast to signed int16
+					value = (bytecode[i] << 8) | bytecode[i+1]
+					if value >= 0x8000:
+						value = value - 0x10000
+					assembly.append(`value`)
 					i = i + 2
 			else:
-				if argbits:
-					assembly.append(mnemonic + "[%s]" % bitRepr(op - argoffset, argbits))
+				if argBits:
+					assembly.append(mnemonic + "[%s]" % num2binary(op - argoffset, argBits))
 				else:
 					assembly.append(mnemonic + "[ ]")
 				i = i + 1
 		self.assembly = assembly
-		del self.bytecode
 
 
-fpgm = '@\01476&%\037\023\022\015\014\005\004\002, \260\003%E#E#ah\212 Eh \212#D`D-,KRXED\033!!Y-,  EhD \260\001` E\260Fvh\030\212E`D-,\260\022+\260\002%E\260\002%Ej\260@\213`\260\002%#D!!!-,\260\023+\260\002%E\260\002%Ej\270\377\300\214`\260\002%#D!!!-,\261\000\003%EhTX\260\003%E\260\003%E`h \260\004%#D\260\004%#D\033\260\003% Eh \212#D\260\003%Eh`\260\003%#DY-,\260\003% Eh \212#D\260\003%Eh`\260\003%#D-,KRXED\033!!Y-,F#F`\212\212F# F\212`\212a\270\377\200b# \020#\212\261KK\212pE` \260\000PX\260\001a\270\377\272\213\033\260F\214Y\260\020`h\001:-, E\260\003%FRX?\033!\021Y-,KS#KQZX E\212`D\033!!Y-,KS#KQZX8\033!!Y-'
-gpgm = '@\022\011\003\207@\005\200\004\207\000\010\007\202\001\010\004\202\000\010\000\020\320\355\020\336\355\001\020\336\375\032}\336\032\030\375\31610'
-
-p = Program()
-p.fromBytecode(fpgm)
-for line in p.getAssembly():
-	print line
+if __name__ == "__main__":
+	bc = """@;:9876543210/.-,+*)(\'&%$#"! \037\036\035\034\033\032\031\030\027\026\025\024\023\022\021\020\017\016\015\014\013\012\011\010\007\006\005\004\003\002\001\000,\001\260\030CXEj\260\031C`\260F#D#\020 \260FN\360M/\260\000\022\033!#\0213Y-,\001\260\030CX\260\005+\260\000\023K\260\024PX\261\000@8Y\260\006+\033!#\0213Y-,\001\260\030CXN\260\003%\020\362!\260\000\022M\033 E\260\004%\260\004%#Jad\260(RX!#\020\326\033\260\003%\020\362!\260\000\022YY-,\260\032CX!!\033\260\002%\260\002%I\260\003%\260\003%Ja d\260\020PX!!!\033\260\003%\260\003%I\260\000PX\260\000PX\270\377\3428!\033\260\0208!Y\033\260\000RX\260\0368!\033\270\377\3608!YYYY-,\001\260\030CX\260\005+\260\000\023K\260\024PX\271\000\000\377\3008Y\260\006+\033!#\0213Y-,N\001\212\020\261F\031CD\260\000\024\261\000F\342\260\000\025\271\000\000\377\3608\000\260\000<\260(+\260\002%\020\260\000<-,\001\030\260\000/\260\001\024\362\260\001\023\260\001\025M\260\000\022-,\001\260\030CX\260\005+\260\000\023\271\000\000\377\3408\260\006+\033!#\0213Y-,\001\260\030CXEdj#Edi\260\031Cd``\260F#D#\020 \260F\360/\260\000\022\033!! \212 \212RX\0213\033!!YY-,\001\261\013\012C#Ce\012-,\000\261\012\013C#C\013-,\000\260F#p\261\001F>\001\260F#p\261\002FE:\261\002\000\010\015-,\260\022+\260\002%E\260\002%Ej\260@\213`\260\002%#D!!!-,\260\023+\260\002%E\260\002%Ej\270\377\300\214`\260\002%#D!!!-,\260\000\260\022+!!!-,\260\000\260\023+!!!-,\001\260\006C\260\007Ce\012-, i\260@a\260\000\213 \261,\300\212\214\270\020\000b`+\014d#da\\X\260\003aY-,\261\000\003%EhT\260\034KPZX\260\003%E\260\003%E`h \260\004%#D\260\004%#D\033\260\003% Eh \212#D\260\003%Eh`\260\003%#DY-,\260\003% Eh \212#D\260\003%Edhe`\260\004%\260\001`#D-,\260\011CX\207!\300\033\260\022CX\207E\260\021+\260G#D\260Gz\344\033\003\212E\030i \260G#D\212\212\207 \260\240QX\260\021+\260G#D\260Gz\344\033!\260Gz\344YYY\030-, \212E#Eh`D-,EjB-,\001\030/-,\001\260\030CX\260\004%\260\004%Id#Edi\260@\213a \260\200bj\260\002%\260\002%a\214\260\031C`\260F#D!\212\020\260F\366!\033!!!!Y-,\001\260\030CX\260\002%E\260\002%Ed`j\260\003%Eja \260\004%Ej \212\213e\260\004%#D\214\260\003%#D!!\033 EjD EjDY-,\001 E\260\000U\260\030CZXEh#Ei\260@\213a \260\200bj \212#a \260\003%\213e\260\004%#D\214\260\003%#D!!\033!!\260\031+Y-,\001\212\212Ed#EdadB-,\260\004%\260\004%\260\031+\260\030CX\260\004%\260\004%\260\003%\260\033+\001\260\002%C\260@T\260\002%C\260\000TZX\260\003% E\260@aDY\260\002%C\260\000T\260\002%C\260@TZX\260\004% E\260@`DYY!!!!-,\001KRXC\260\002%E#aD\033!!Y-,\001KRXC\260\002%E#`D\033!!Y-,KRXED\033!!Y-,\001 \260\003%#I\260@`\260 c \260\000RX#\260\002%8#\260\002%e8\000\212c8\033!!!!!Y\001-,KPXED\033!!Y-,\001\260\005%\020# \212\365\000\260\001`#\355\354-,\001\260\005%\020# \212\365\000\260\001a#\355\354-,\001\260\006%\020\365\000\355\354-,F#F`\212\212F# F\212`\212a\270\377\200b# \020#\212\261KK\212pE` \260\000PX\260\001a\270\377\272\213\033\260F\214Y\260\020`h\001:-, E\260\003%FRX\260\002%F ha\260\003%\260\003%?#!8\033!\021Y-, E\260\003%FPX\260\002%F ha\260\003%\260\003%?#!8\033!\021Y-,\000\260\007C\260\006C\013-,\212\020\354-,\260\014CX!\033 F\260\000RX\270\377\3608\033\260\0208YY-, \260\000UX\270\020\000c\260\003%Ed\260\003%Eda\260\000SX\260\002\033\260@a\260\003Y%EiSXED\033!!Y\033!\260\002%E\260\002%Ead\260(QXED\033!!YY-,!!\014d#d\213\270@\000b-,!\260\200QX\014d#d\213\270 \000b\033\262\000@/+Y\260\002`-,!\260\300QX\014d#d\213\270\025Ub\033\262\000\200/+Y\260\002`-,\014d#d\213\270@\000b`#!-,KSX\260\004%\260\004%Id#Edi\260@\213a \260\200bj\260\002%\260\002%a\214\260F#D!\212\020\260F\366!\033!\212\021#\022 9/Y-,\260\002%\260\002%Id\260\300TX\270\377\3708\260\0108\033!!Y-,\260\023CX\003\033\002Y-,\260\023CX\002\033\003Y-,\260\012+#\020 <\260\027+-,\260\002%\270\377\3608\260(+\212\020# \320#\260\020+\260\005CX\300\033<Y \020\021\260\000\022\001-,KS#KQZX8\033!!Y-,\001\260\002%\020\320#\311\001\260\001\023\260\000\024\020\260\001<\260\001\026-,\001\260\000\023\260\001\260\003%I\260\003\0278\260\001\023-,KS#KQZX E\212`D\033!!Y-, 9/-"""
+	
+	p = Program()
+	p.fromBytecode(bc)
+	as = p.getAssembly()
+	p.fromAssembly(as)
+	print bc == p.getBytecode()