blob: 4cb910c3e6ad9951a4b770ec671a8adc6b5c7b6e [file] [log] [blame]
Jeremy Hyltona5058122000-02-14 14:14:29 +00001"""Assembler for Python bytecode
2
3The new module is used to create the code object. The following
4attribute definitions are included from the reference manual:
5
6co_name gives the function name
7co_argcount is the number of positional arguments (including
8 arguments with default values)
9co_nlocals is the number of local variables used by the function
10 (including arguments)
11co_varnames is a tuple containing the names of the local variables
12 (starting with the argument names)
13co_code is a string representing the sequence of bytecode instructions
14co_consts is a tuple containing the literals used by the bytecode
15co_names is a tuple containing the names used by the bytecode
16co_filename is the filename from which the code was compiled
17co_firstlineno is the first line number of the function
18co_lnotab is a string encoding the mapping from byte code offsets
19 to line numbers. see LineAddrTable below.
20co_stacksize is the required stack size (including local variables)
21co_flags is an integer encoding a number of flags for the
22 interpreter. There are four flags:
23 CO_OPTIMIZED -- uses load fast
24 CO_NEWLOCALS -- everything?
25 CO_VARARGS -- use *args
26 CO_VARKEYWORDS -- uses **args
27
28If a code object represents a function, the first item in co_consts is
29the documentation string of the function, or None if undefined.
30"""
31
32import sys
33import dis
34import new
35import string
36
37import misc
38
39# flags for code objects
40CO_OPTIMIZED = 0x0001
41CO_NEWLOCALS = 0x0002
42CO_VARARGS = 0x0004
43CO_VARKEYWORDS = 0x0008
44
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000045class TupleArg:
46 def __init__(self, count, names):
47 self.count = count
48 self.names = names
49 def __repr__(self):
50 return "TupleArg(%s, %s)" % (self.count, self.names)
51 def getName(self):
52 return ".nested%d" % self.count
53
Jeremy Hyltona5058122000-02-14 14:14:29 +000054class PyAssembler:
55 """Creates Python code objects
56 """
57
58 # XXX this class needs to major refactoring
59
60 def __init__(self, args=(), name='?', filename='<?>',
61 docstring=None):
62 # XXX why is the default value for flags 3?
Jeremy Hylton772dd412000-02-21 22:46:00 +000063 self.insts = []
Jeremy Hyltona5058122000-02-14 14:14:29 +000064 # used by makeCodeObject
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000065 self._getArgCount(args)
Jeremy Hyltona5058122000-02-14 14:14:29 +000066 self.code = ''
67 self.consts = [docstring]
68 self.filename = filename
69 self.flags = CO_NEWLOCALS
70 self.name = name
71 self.names = []
72 self.varnames = list(args) or []
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000073 for i in range(len(self.varnames)):
74 var = self.varnames[i]
75 if isinstance(var, TupleArg):
76 self.varnames[i] = var.getName()
Jeremy Hyltona5058122000-02-14 14:14:29 +000077 # lnotab support
78 self.firstlineno = 0
79 self.lastlineno = 0
80 self.last_addr = 0
81 self.lnotab = ''
82
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000083 def _getArgCount(self, args):
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000084 self.argcount = len(args)
85 if args:
86 for arg in args:
87 if isinstance(arg, TupleArg):
88 numNames = len(misc.flatten(arg.names))
89 self.argcount = self.argcount - numNames
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000090
Jeremy Hyltona5058122000-02-14 14:14:29 +000091 def __repr__(self):
92 return "<bytecode: %d instrs>" % len(self.insts)
93
94 def setFlags(self, val):
95 """XXX for module's function"""
96 self.flags = val
97
98 def setOptimized(self):
99 self.flags = self.flags | CO_OPTIMIZED
100
101 def setVarArgs(self):
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +0000102 if not self.flags & CO_VARARGS:
103 self.flags = self.flags | CO_VARARGS
104 self.argcount = self.argcount - 1
Jeremy Hyltona5058122000-02-14 14:14:29 +0000105
106 def setKWArgs(self):
107 self.flags = self.flags | CO_VARKEYWORDS
108
109 def getCurInst(self):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000110 return len(self.insts)
Jeremy Hyltona5058122000-02-14 14:14:29 +0000111
112 def getNextInst(self):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000113 return len(self.insts) + 1
Jeremy Hyltona5058122000-02-14 14:14:29 +0000114
115 def dump(self, io=sys.stdout):
116 i = 0
117 for inst in self.insts:
118 if inst[0] == 'SET_LINENO':
119 io.write("\n")
120 io.write(" %3d " % i)
121 if len(inst) == 1:
122 io.write("%s\n" % inst)
123 else:
124 io.write("%-15.15s\t%s\n" % inst)
125 i = i + 1
126
127 def makeCodeObject(self):
128 """Make a Python code object
129
130 This creates a Python code object using the new module. This
131 seems simpler than reverse-engineering the way marshal dumps
132 code objects into .pyc files. One of the key difficulties is
133 figuring out how to layout references to code objects that
134 appear on the VM stack; e.g.
135 3 SET_LINENO 1
136 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
137 9 MAKE_FUNCTION 0
138 12 STORE_NAME 0 (fact)
139 """
140
141 self._findOffsets()
142 lnotab = LineAddrTable()
143 for t in self.insts:
144 opname = t[0]
145 if len(t) == 1:
Jeremy Hyltonabd7ebf2000-03-06 18:53:14 +0000146 lnotab.addCode(self.opnum[opname])
Jeremy Hyltona5058122000-02-14 14:14:29 +0000147 elif len(t) == 2:
Jeremy Hyltona5058122000-02-14 14:14:29 +0000148 if opname == 'SET_LINENO':
Jeremy Hyltonabd7ebf2000-03-06 18:53:14 +0000149 oparg = t[1]
Jeremy Hyltona5058122000-02-14 14:14:29 +0000150 lnotab.nextLine(oparg)
Jeremy Hyltonabd7ebf2000-03-06 18:53:14 +0000151 else:
152 oparg = self._convertArg(opname, t[1])
Jeremy Hyltona5058122000-02-14 14:14:29 +0000153 try:
154 hi, lo = divmod(oparg, 256)
155 except TypeError:
156 raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
Jeremy Hyltonabd7ebf2000-03-06 18:53:14 +0000157 lnotab.addCode(self.opnum[opname], lo, hi)
158
Jeremy Hyltona5058122000-02-14 14:14:29 +0000159 # why is a module a special case?
160 if self.flags == 0:
161 nlocals = 0
162 else:
163 nlocals = len(self.varnames)
164 # XXX danger! can't pass through here twice
165 if self.flags & CO_VARKEYWORDS:
166 self.argcount = self.argcount - 1
Jeremy Hylton772dd412000-02-21 22:46:00 +0000167 stacksize = findDepth(self.insts)
Jeremy Hylton410e8402000-02-15 21:59:50 +0000168 try:
169 co = new.code(self.argcount, nlocals, stacksize,
170 self.flags, lnotab.getCode(), self._getConsts(),
171 tuple(self.names), tuple(self.varnames),
172 self.filename, self.name, self.firstlineno,
173 lnotab.getTable())
174 except SystemError, err:
175 print err
176 print repr(self.argcount)
177 print repr(nlocals)
178 print repr(stacksize)
179 print repr(self.flags)
180 print repr(lnotab.getCode())
181 print repr(self._getConsts())
182 print repr(self.names)
183 print repr(self.varnames)
184 print repr(self.filename)
185 print repr(self.name)
186 print repr(self.firstlineno)
187 print repr(lnotab.getTable())
188 raise
Jeremy Hyltona5058122000-02-14 14:14:29 +0000189 return co
190
191 def _getConsts(self):
192 """Return a tuple for the const slot of a code object
193
194 Converts PythonVMCode objects to code objects
195 """
196 l = []
197 for elt in self.consts:
Jeremy Hylton772dd412000-02-21 22:46:00 +0000198 # XXX might be clearer to just as isinstance(CodeGen)
199 if hasattr(elt, 'asConst'):
200 l.append(elt.asConst())
Jeremy Hyltona5058122000-02-14 14:14:29 +0000201 else:
202 l.append(elt)
203 return tuple(l)
204
205 def _findOffsets(self):
206 """Find offsets for use in resolving StackRefs"""
207 self.offsets = []
208 cur = 0
209 for t in self.insts:
210 self.offsets.append(cur)
211 l = len(t)
212 if l == 1:
213 cur = cur + 1
214 elif l == 2:
215 cur = cur + 3
216 arg = t[1]
217 # XXX this is a total hack: for a reference used
218 # multiple times, we create a list of offsets and
219 # expect that we when we pass through the code again
220 # to actually generate the offsets, we'll pass in the
221 # same order.
222 if isinstance(arg, StackRef):
223 try:
224 arg.__offset.append(cur)
225 except AttributeError:
226 arg.__offset = [cur]
227
228 def _convertArg(self, op, arg):
229 """Convert the string representation of an arg to a number
230
231 The specific handling depends on the opcode.
232
233 XXX This first implementation isn't going to be very
234 efficient.
235 """
236 if op == 'SET_LINENO':
237 return arg
238 if op == 'LOAD_CONST':
239 return self._lookupName(arg, self.consts)
240 if op in self.localOps:
241 # make sure it's in self.names, but use the bytecode offset
242 self._lookupName(arg, self.names)
243 return self._lookupName(arg, self.varnames)
244 if op in self.globalOps:
245 return self._lookupName(arg, self.names)
246 if op in self.nameOps:
247 return self._lookupName(arg, self.names)
248 if op == 'COMPARE_OP':
249 return self.cmp_op.index(arg)
250 if self.hasjrel.has_elt(op):
251 offset = arg.__offset[0]
252 del arg.__offset[0]
253 return self.offsets[arg.resolve()] - offset
254 if self.hasjabs.has_elt(op):
255 return self.offsets[arg.resolve()]
256 return arg
257
258 nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
Jeremy Hylton2ce27b22000-02-16 00:50:29 +0000259 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME',
260 'DELETE_ATTR')
Jeremy Hyltona5058122000-02-14 14:14:29 +0000261 localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
262 globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
263
Jeremy Hyltonefd06942000-02-17 22:58:54 +0000264 def _lookupName(self, name, list):
265 """Return index of name in list, appending if necessary"""
Jeremy Hyltona5058122000-02-14 14:14:29 +0000266 if name in list:
Jeremy Hyltonefd06942000-02-17 22:58:54 +0000267 i = list.index(name)
268 # this is cheap, but incorrect in some cases, e.g 2 vs. 2L
269 if type(name) == type(list[i]):
270 return i
271 for i in range(len(list)):
272 elt = list[i]
273 if type(elt) == type(name) and elt == name:
274 return i
275 end = len(list)
276 list.append(name)
277 return end
Jeremy Hyltona5058122000-02-14 14:14:29 +0000278
279 # Convert some stuff from the dis module for local use
280
281 cmp_op = list(dis.cmp_op)
282 hasjrel = misc.Set()
283 for i in dis.hasjrel:
284 hasjrel.add(dis.opname[i])
285 hasjabs = misc.Set()
286 for i in dis.hasjabs:
287 hasjabs.add(dis.opname[i])
288
289 opnum = {}
290 for num in range(len(dis.opname)):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000291 opnum[dis.opname[num]] = num
Jeremy Hyltona5058122000-02-14 14:14:29 +0000292
293 # this version of emit + arbitrary hooks might work, but it's damn
294 # messy.
295
296 def emit(self, *args):
297 self._emitDispatch(args[0], args[1:])
Jeremy Hylton772dd412000-02-21 22:46:00 +0000298 self.insts.append(args)
Jeremy Hyltona5058122000-02-14 14:14:29 +0000299
300 def _emitDispatch(self, type, args):
301 for func in self._emit_hooks.get(type, []):
302 func(self, args)
303
304 _emit_hooks = {}
305
306class LineAddrTable:
307 """lnotab
308
309 This class builds the lnotab, which is undocumented but described
310 by com_set_lineno in compile.c. Here's an attempt at explanation:
311
312 For each SET_LINENO instruction after the first one, two bytes are
313 added to lnotab. (In some cases, multiple two-byte entries are
314 added.) The first byte is the distance in bytes between the
315 instruction for the last SET_LINENO and the current SET_LINENO.
316 The second byte is offset in line numbers. If either offset is
317 greater than 255, multiple two-byte entries are added -- one entry
318 for each factor of 255.
319 """
320
321 def __init__(self):
322 self.code = []
323 self.codeOffset = 0
324 self.firstline = 0
325 self.lastline = 0
326 self.lastoff = 0
327 self.lnotab = []
328
Jeremy Hyltonabd7ebf2000-03-06 18:53:14 +0000329 def addCode(self, *args):
330 for arg in args:
331 self.code.append(chr(arg))
332 self.codeOffset = self.codeOffset + len(args)
Jeremy Hyltona5058122000-02-14 14:14:29 +0000333
334 def nextLine(self, lineno):
335 if self.firstline == 0:
336 self.firstline = lineno
337 self.lastline = lineno
338 else:
339 # compute deltas
340 addr = self.codeOffset - self.lastoff
341 line = lineno - self.lastline
342 while addr > 0 or line > 0:
343 # write the values in 1-byte chunks that sum
344 # to desired value
345 trunc_addr = addr
346 trunc_line = line
347 if trunc_addr > 255:
348 trunc_addr = 255
349 if trunc_line > 255:
350 trunc_line = 255
351 self.lnotab.append(trunc_addr)
352 self.lnotab.append(trunc_line)
353 addr = addr - trunc_addr
354 line = line - trunc_line
355 self.lastline = lineno
356 self.lastoff = self.codeOffset
357
358 def getCode(self):
359 return string.join(self.code, '')
360
361 def getTable(self):
362 return string.join(map(chr, self.lnotab), '')
363
364class StackRef:
365 """Manage stack locations for jumps, loops, etc."""
366 count = 0
367
368 def __init__(self, id=None, val=None):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000369 if id is None:
370 id = StackRef.count
371 StackRef.count = StackRef.count + 1
372 self.id = id
373 self.val = val
Jeremy Hyltona5058122000-02-14 14:14:29 +0000374
375 def __repr__(self):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000376 if self.val:
377 return "StackRef(val=%d)" % self.val
378 else:
379 return "StackRef(id=%d)" % self.id
Jeremy Hyltona5058122000-02-14 14:14:29 +0000380
381 def bind(self, inst):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000382 self.val = inst
Jeremy Hyltona5058122000-02-14 14:14:29 +0000383
384 def resolve(self):
385 if self.val is None:
386 print "UNRESOLVE REF", self
387 return 0
Jeremy Hylton772dd412000-02-21 22:46:00 +0000388 return self.val
Jeremy Hyltona5058122000-02-14 14:14:29 +0000389
390class StackDepthTracker:
391 # XXX need to keep track of stack depth on jumps
392
393 def findDepth(self, insts):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000394 depth = 0
395 maxDepth = 0
396 for i in insts:
397 opname = i[0]
398 delta = self.effect.get(opname, 0)
399 if delta > 1:
400 depth = depth + delta
401 elif delta < 0:
402 if depth > maxDepth:
403 maxDepth = depth
404 depth = depth + delta
405 else:
406 if depth > maxDepth:
407 maxDepth = depth
408 # now check patterns
409 for pat, delta in self.patterns:
410 if opname[:len(pat)] == pat:
411 depth = depth + delta
412 break
413 # if we still haven't found a match
414 if delta == 0:
415 meth = getattr(self, opname)
416 depth = depth + meth(i[1])
417 if depth < 0:
418 depth = 0
419 return maxDepth
Jeremy Hyltona5058122000-02-14 14:14:29 +0000420
421 effect = {
Jeremy Hylton772dd412000-02-21 22:46:00 +0000422 'POP_TOP': -1,
423 'DUP_TOP': 1,
424 'SLICE+1': -1,
425 'SLICE+2': -1,
426 'SLICE+3': -2,
427 'STORE_SLICE+0': -1,
428 'STORE_SLICE+1': -2,
429 'STORE_SLICE+2': -2,
430 'STORE_SLICE+3': -3,
431 'DELETE_SLICE+0': -1,
432 'DELETE_SLICE+1': -2,
433 'DELETE_SLICE+2': -2,
434 'DELETE_SLICE+3': -3,
435 'STORE_SUBSCR': -3,
436 'DELETE_SUBSCR': -2,
437 # PRINT_EXPR?
438 'PRINT_ITEM': -1,
439 'LOAD_LOCALS': 1,
440 'RETURN_VALUE': -1,
441 'EXEC_STMT': -2,
442 'BUILD_CLASS': -2,
443 'STORE_NAME': -1,
444 'STORE_ATTR': -2,
445 'DELETE_ATTR': -1,
446 'STORE_GLOBAL': -1,
447 'BUILD_MAP': 1,
448 'COMPARE_OP': -1,
449 'STORE_FAST': -1,
450 }
Jeremy Hyltona5058122000-02-14 14:14:29 +0000451 # use pattern match
452 patterns = [
Jeremy Hylton772dd412000-02-21 22:46:00 +0000453 ('BINARY_', -1),
454 ('LOAD_', 1),
455 ('IMPORT_', 1),
456 ]
Jeremy Hyltonabd7ebf2000-03-06 18:53:14 +0000457
458 # special cases:
459 # UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE,
Jeremy Hyltona5058122000-02-14 14:14:29 +0000460 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
461 def UNPACK_TUPLE(self, count):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000462 return count
Jeremy Hyltona5058122000-02-14 14:14:29 +0000463 def UNPACK_LIST(self, count):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000464 return count
Jeremy Hyltona5058122000-02-14 14:14:29 +0000465 def BUILD_TUPLE(self, count):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000466 return -count
Jeremy Hyltona5058122000-02-14 14:14:29 +0000467 def BUILD_LIST(self, count):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000468 return -count
Jeremy Hyltona5058122000-02-14 14:14:29 +0000469 def CALL_FUNCTION(self, argc):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000470 hi, lo = divmod(argc, 256)
471 return lo + hi * 2
Jeremy Hyltona5058122000-02-14 14:14:29 +0000472 def MAKE_FUNCTION(self, argc):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000473 return -argc
Jeremy Hyltona5058122000-02-14 14:14:29 +0000474 def BUILD_SLICE(self, argc):
Jeremy Hylton772dd412000-02-21 22:46:00 +0000475 if argc == 2:
476 return -1
477 elif argc == 3:
478 return -2
Jeremy Hyltona5058122000-02-14 14:14:29 +0000479
480findDepth = StackDepthTracker().findDepth