Blame - Lib/compiler/pyassem.py - platform/external/python/cpython3

blob: 2200dec13abcabf204c6b399ecc0708a445a343c [file] [log] [blame]

Jeremy Hylton	a505812	2000-02-14 14:14:29 +0000	[diff] [blame]	1	"""Assembler for Python bytecode
				2
				3	The new module is used to create the code object. The following
				4	attribute definitions are included from the reference manual:
				5
				6	co_name gives the function name
				7	co_argcount is the number of positional arguments (including
				8	arguments with default values)
				9	co_nlocals is the number of local variables used by the function
				10	(including arguments)
				11	co_varnames is a tuple containing the names of the local variables
				12	(starting with the argument names)
				13	co_code is a string representing the sequence of bytecode instructions
				14	co_consts is a tuple containing the literals used by the bytecode
				15	co_names is a tuple containing the names used by the bytecode
				16	co_filename is the filename from which the code was compiled
				17	co_firstlineno is the first line number of the function
				18	co_lnotab is a string encoding the mapping from byte code offsets
				19	to line numbers. see LineAddrTable below.
				20	co_stacksize is the required stack size (including local variables)
				21	co_flags is an integer encoding a number of flags for the
				22	interpreter. There are four flags:
				23	CO_OPTIMIZED -- uses load fast
				24	CO_NEWLOCALS -- everything?
				25	CO_VARARGS -- use *args
				26	CO_VARKEYWORDS -- uses **args
				27
				28	If a code object represents a function, the first item in co_consts is
				29	the documentation string of the function, or None if undefined.
				30	"""
				31
				32	import sys
				33	import dis
				34	import new
				35	import string
				36
				37	import misc
				38
				39	# flags for code objects
				40	CO_OPTIMIZED = 0x0001
				41	CO_NEWLOCALS = 0x0002
				42	CO_VARARGS = 0x0004
				43	CO_VARKEYWORDS = 0x0008
				44
				45	class PyAssembler:
				46	"""Creates Python code objects
				47	"""
				48
				49	# XXX this class needs to major refactoring
				50
				51	def __init__(self, args=(), name='?', filename='<?>',
				52	docstring=None):
				53	# XXX why is the default value for flags 3?
				54	self.insts = []
				55	# used by makeCodeObject
				56	self.argcount = len(args)
				57	self.code = ''
				58	self.consts = [docstring]
				59	self.filename = filename
				60	self.flags = CO_NEWLOCALS
				61	self.name = name
				62	self.names = []
				63	self.varnames = list(args) or []
				64	# lnotab support
				65	self.firstlineno = 0
				66	self.lastlineno = 0
				67	self.last_addr = 0
				68	self.lnotab = ''
				69
				70	def __repr__(self):
				71	return "<bytecode: %d instrs>" % len(self.insts)
				72
				73	def setFlags(self, val):
				74	"""XXX for module's function"""
				75	self.flags = val
				76
				77	def setOptimized(self):
				78	self.flags = self.flags \| CO_OPTIMIZED
				79
				80	def setVarArgs(self):
				81	self.flags = self.flags \| CO_VARARGS
				82
				83	def setKWArgs(self):
				84	self.flags = self.flags \| CO_VARKEYWORDS
				85
				86	def getCurInst(self):
				87	return len(self.insts)
				88
				89	def getNextInst(self):
				90	return len(self.insts) + 1
				91
				92	def dump(self, io=sys.stdout):
				93	i = 0
				94	for inst in self.insts:
				95	if inst[0] == 'SET_LINENO':
				96	io.write("\n")
				97	io.write(" %3d " % i)
				98	if len(inst) == 1:
				99	io.write("%s\n" % inst)
				100	else:
				101	io.write("%-15.15s\t%s\n" % inst)
				102	i = i + 1
				103
				104	def makeCodeObject(self):
				105	"""Make a Python code object
				106
				107	This creates a Python code object using the new module. This
				108	seems simpler than reverse-engineering the way marshal dumps
				109	code objects into .pyc files. One of the key difficulties is
				110	figuring out how to layout references to code objects that
				111	appear on the VM stack; e.g.
				112	3 SET_LINENO 1
				113	6 LOAD_CONST 0 (<code object fact at 8115878 [...]
				114	9 MAKE_FUNCTION 0
				115	12 STORE_NAME 0 (fact)
				116	"""
				117
				118	self._findOffsets()
				119	lnotab = LineAddrTable()
				120	for t in self.insts:
				121	opname = t[0]
				122	if len(t) == 1:
				123	lnotab.addCode(chr(self.opnum[opname]))
				124	elif len(t) == 2:
				125	oparg = self._convertArg(opname, t[1])
				126	if opname == 'SET_LINENO':
				127	lnotab.nextLine(oparg)
				128	try:
				129	hi, lo = divmod(oparg, 256)
				130	except TypeError:
				131	raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
				132	lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
				133	chr(hi))
				134	# why is a module a special case?
				135	if self.flags == 0:
				136	nlocals = 0
				137	else:
				138	nlocals = len(self.varnames)
				139	# XXX danger! can't pass through here twice
				140	if self.flags & CO_VARKEYWORDS:
				141	self.argcount = self.argcount - 1
				142	stacksize = findDepth(self.insts)
Jeremy Hylton	410e840	2000-02-15 21:59:50 +0000	[diff] [blame^]	143	try:
				144	co = new.code(self.argcount, nlocals, stacksize,
				145	self.flags, lnotab.getCode(), self._getConsts(),
				146	tuple(self.names), tuple(self.varnames),
				147	self.filename, self.name, self.firstlineno,
				148	lnotab.getTable())
				149	except SystemError, err:
				150	print err
				151	print repr(self.argcount)
				152	print repr(nlocals)
				153	print repr(stacksize)
				154	print repr(self.flags)
				155	print repr(lnotab.getCode())
				156	print repr(self._getConsts())
				157	print repr(self.names)
				158	print repr(self.varnames)
				159	print repr(self.filename)
				160	print repr(self.name)
				161	print repr(self.firstlineno)
				162	print repr(lnotab.getTable())
				163	raise
Jeremy Hylton	a505812	2000-02-14 14:14:29 +0000	[diff] [blame]	164	return co
				165
				166	def _getConsts(self):
				167	"""Return a tuple for the const slot of a code object
				168
				169	Converts PythonVMCode objects to code objects
				170	"""
				171	l = []
				172	for elt in self.consts:
				173	# XXX might be clearer to just as isinstance(CodeGen)
				174	if hasattr(elt, 'asConst'):
				175	l.append(elt.asConst())
				176	else:
				177	l.append(elt)
				178	return tuple(l)
				179
				180	def _findOffsets(self):
				181	"""Find offsets for use in resolving StackRefs"""
				182	self.offsets = []
				183	cur = 0
				184	for t in self.insts:
				185	self.offsets.append(cur)
				186	l = len(t)
				187	if l == 1:
				188	cur = cur + 1
				189	elif l == 2:
				190	cur = cur + 3
				191	arg = t[1]
				192	# XXX this is a total hack: for a reference used
				193	# multiple times, we create a list of offsets and
				194	# expect that we when we pass through the code again
				195	# to actually generate the offsets, we'll pass in the
				196	# same order.
				197	if isinstance(arg, StackRef):
				198	try:
				199	arg.__offset.append(cur)
				200	except AttributeError:
				201	arg.__offset = [cur]
				202
				203	def _convertArg(self, op, arg):
				204	"""Convert the string representation of an arg to a number
				205
				206	The specific handling depends on the opcode.
				207
				208	XXX This first implementation isn't going to be very
				209	efficient.
				210	"""
				211	if op == 'SET_LINENO':
				212	return arg
				213	if op == 'LOAD_CONST':
				214	return self._lookupName(arg, self.consts)
				215	if op in self.localOps:
				216	# make sure it's in self.names, but use the bytecode offset
				217	self._lookupName(arg, self.names)
				218	return self._lookupName(arg, self.varnames)
				219	if op in self.globalOps:
				220	return self._lookupName(arg, self.names)
				221	if op in self.nameOps:
				222	return self._lookupName(arg, self.names)
				223	if op == 'COMPARE_OP':
				224	return self.cmp_op.index(arg)
				225	if self.hasjrel.has_elt(op):
				226	offset = arg.__offset[0]
				227	del arg.__offset[0]
				228	return self.offsets[arg.resolve()] - offset
				229	if self.hasjabs.has_elt(op):
				230	return self.offsets[arg.resolve()]
				231	return arg
				232
				233	nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
				234	'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME')
				235	localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
				236	globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
				237
				238	def _lookupName(self, name, list, list2=None):
				239	"""Return index of name in list, appending if necessary
				240
				241	Yicky hack: Second list can be used for lookup of local names
				242	where the name needs to be added to varnames and names.
				243	"""
				244	if name in list:
				245	return list.index(name)
				246	else:
				247	end = len(list)
				248	list.append(name)
				249	if list2 is not None:
				250	list2.append(name)
				251	return end
				252
				253	# Convert some stuff from the dis module for local use
				254
				255	cmp_op = list(dis.cmp_op)
				256	hasjrel = misc.Set()
				257	for i in dis.hasjrel:
				258	hasjrel.add(dis.opname[i])
				259	hasjabs = misc.Set()
				260	for i in dis.hasjabs:
				261	hasjabs.add(dis.opname[i])
				262
				263	opnum = {}
				264	for num in range(len(dis.opname)):
				265	opnum[dis.opname[num]] = num
				266
				267	# this version of emit + arbitrary hooks might work, but it's damn
				268	# messy.
				269
				270	def emit(self, *args):
				271	self._emitDispatch(args[0], args[1:])
				272	self.insts.append(args)
				273
				274	def _emitDispatch(self, type, args):
				275	for func in self._emit_hooks.get(type, []):
				276	func(self, args)
				277
				278	_emit_hooks = {}
				279
				280	class LineAddrTable:
				281	"""lnotab
				282
				283	This class builds the lnotab, which is undocumented but described
				284	by com_set_lineno in compile.c. Here's an attempt at explanation:
				285
				286	For each SET_LINENO instruction after the first one, two bytes are
				287	added to lnotab. (In some cases, multiple two-byte entries are
				288	added.) The first byte is the distance in bytes between the
				289	instruction for the last SET_LINENO and the current SET_LINENO.
				290	The second byte is offset in line numbers. If either offset is
				291	greater than 255, multiple two-byte entries are added -- one entry
				292	for each factor of 255.
				293	"""
				294
				295	def __init__(self):
				296	self.code = []
				297	self.codeOffset = 0
				298	self.firstline = 0
				299	self.lastline = 0
				300	self.lastoff = 0
				301	self.lnotab = []
				302
				303	def addCode(self, code):
				304	self.code.append(code)
				305	self.codeOffset = self.codeOffset + len(code)
				306
				307	def nextLine(self, lineno):
				308	if self.firstline == 0:
				309	self.firstline = lineno
				310	self.lastline = lineno
				311	else:
				312	# compute deltas
				313	addr = self.codeOffset - self.lastoff
				314	line = lineno - self.lastline
				315	while addr > 0 or line > 0:
				316	# write the values in 1-byte chunks that sum
				317	# to desired value
				318	trunc_addr = addr
				319	trunc_line = line
				320	if trunc_addr > 255:
				321	trunc_addr = 255
				322	if trunc_line > 255:
				323	trunc_line = 255
				324	self.lnotab.append(trunc_addr)
				325	self.lnotab.append(trunc_line)
				326	addr = addr - trunc_addr
				327	line = line - trunc_line
				328	self.lastline = lineno
				329	self.lastoff = self.codeOffset
				330
				331	def getCode(self):
				332	return string.join(self.code, '')
				333
				334	def getTable(self):
				335	return string.join(map(chr, self.lnotab), '')
				336
				337	class StackRef:
				338	"""Manage stack locations for jumps, loops, etc."""
				339	count = 0
				340
				341	def __init__(self, id=None, val=None):
				342	if id is None:
				343	id = StackRef.count
				344	StackRef.count = StackRef.count + 1
				345	self.id = id
				346	self.val = val
				347
				348	def __repr__(self):
				349	if self.val:
				350	return "StackRef(val=%d)" % self.val
				351	else:
				352	return "StackRef(id=%d)" % self.id
				353
				354	def bind(self, inst):
				355	self.val = inst
				356
				357	def resolve(self):
				358	if self.val is None:
				359	print "UNRESOLVE REF", self
				360	return 0
				361	return self.val
				362
				363	class StackDepthTracker:
				364	# XXX need to keep track of stack depth on jumps
				365
				366	def findDepth(self, insts):
				367	depth = 0
				368	maxDepth = 0
				369	for i in insts:
				370	opname = i[0]
				371	delta = self.effect.get(opname, 0)
				372	if delta > 1:
				373	depth = depth + delta
				374	elif delta < 0:
				375	if depth > maxDepth:
				376	maxDepth = depth
				377	depth = depth + delta
				378	else:
				379	if depth > maxDepth:
				380	maxDepth = depth
				381	# now check patterns
				382	for pat, delta in self.patterns:
				383	if opname[:len(pat)] == pat:
				384	depth = depth + delta
				385	break
				386	# if we still haven't found a match
				387	if delta == 0:
				388	meth = getattr(self, opname)
				389	depth = depth + meth(i[1])
				390	if depth < 0:
				391	depth = 0
				392	return maxDepth
				393
				394	effect = {
				395	'POP_TOP': -1,
				396	'DUP_TOP': 1,
				397	'SLICE+1': -1,
				398	'SLICE+2': -1,
				399	'SLICE+3': -2,
				400	'STORE_SLICE+0': -1,
				401	'STORE_SLICE+1': -2,
				402	'STORE_SLICE+2': -2,
				403	'STORE_SLICE+3': -3,
				404	'DELETE_SLICE+0': -1,
				405	'DELETE_SLICE+1': -2,
				406	'DELETE_SLICE+2': -2,
				407	'DELETE_SLICE+3': -3,
				408	'STORE_SUBSCR': -3,
				409	'DELETE_SUBSCR': -2,
				410	# PRINT_EXPR?
				411	'PRINT_ITEM': -1,
				412	'LOAD_LOCALS': 1,
				413	'RETURN_VALUE': -1,
				414	'EXEC_STMT': -2,
				415	'BUILD_CLASS': -2,
				416	'STORE_NAME': -1,
				417	'STORE_ATTR': -2,
				418	'DELETE_ATTR': -1,
				419	'STORE_GLOBAL': -1,
				420	'BUILD_MAP': 1,
				421	'COMPARE_OP': -1,
				422	'STORE_FAST': -1,
				423	}
				424	# use pattern match
				425	patterns = [
				426	('BINARY_', -1),
				427	('LOAD_', 1),
				428	('IMPORT_', 1),
				429	]
				430	# special cases
				431
				432	#: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE,
				433	# BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
				434	def UNPACK_TUPLE(self, count):
				435	return count
				436	def UNPACK_LIST(self, count):
				437	return count
				438	def BUILD_TUPLE(self, count):
				439	return -count
				440	def BUILD_LIST(self, count):
				441	return -count
				442	def CALL_FUNCTION(self, argc):
				443	hi, lo = divmod(argc, 256)
				444	return lo + hi * 2
				445	def MAKE_FUNCTION(self, argc):
				446	return -argc
				447	def BUILD_SLICE(self, argc):
				448	if argc == 2:
				449	return -1
				450	elif argc == 3:
				451	return -2
				452
				453	findDepth = StackDepthTracker().findDepth