blob: 5d5ac4dccb323a510c78fe076ddfc654cb64b447 [file] [log] [blame]
Jeremy Hyltona5058122000-02-14 14:14:29 +00001"""Assembler for Python bytecode
2
3The new module is used to create the code object. The following
4attribute definitions are included from the reference manual:
5
6co_name gives the function name
7co_argcount is the number of positional arguments (including
8 arguments with default values)
9co_nlocals is the number of local variables used by the function
10 (including arguments)
11co_varnames is a tuple containing the names of the local variables
12 (starting with the argument names)
13co_code is a string representing the sequence of bytecode instructions
14co_consts is a tuple containing the literals used by the bytecode
15co_names is a tuple containing the names used by the bytecode
16co_filename is the filename from which the code was compiled
17co_firstlineno is the first line number of the function
18co_lnotab is a string encoding the mapping from byte code offsets
19 to line numbers. see LineAddrTable below.
20co_stacksize is the required stack size (including local variables)
21co_flags is an integer encoding a number of flags for the
22 interpreter. There are four flags:
23 CO_OPTIMIZED -- uses load fast
24 CO_NEWLOCALS -- everything?
25 CO_VARARGS -- use *args
26 CO_VARKEYWORDS -- uses **args
27
28If a code object represents a function, the first item in co_consts is
29the documentation string of the function, or None if undefined.
30"""
31
32import sys
33import dis
34import new
35import string
36
37import misc
38
39# flags for code objects
40CO_OPTIMIZED = 0x0001
41CO_NEWLOCALS = 0x0002
42CO_VARARGS = 0x0004
43CO_VARKEYWORDS = 0x0008
44
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000045class TupleArg:
46 def __init__(self, count, names):
47 self.count = count
48 self.names = names
49 def __repr__(self):
50 return "TupleArg(%s, %s)" % (self.count, self.names)
51 def getName(self):
52 return ".nested%d" % self.count
53
Jeremy Hyltona5058122000-02-14 14:14:29 +000054class PyAssembler:
55 """Creates Python code objects
56 """
57
58 # XXX this class needs to major refactoring
59
60 def __init__(self, args=(), name='?', filename='<?>',
61 docstring=None):
62 # XXX why is the default value for flags 3?
63 self.insts = []
64 # used by makeCodeObject
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000065 self._getArgCount(args)
Jeremy Hyltona5058122000-02-14 14:14:29 +000066 self.code = ''
67 self.consts = [docstring]
68 self.filename = filename
69 self.flags = CO_NEWLOCALS
70 self.name = name
71 self.names = []
72 self.varnames = list(args) or []
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000073 for i in range(len(self.varnames)):
74 var = self.varnames[i]
75 if isinstance(var, TupleArg):
76 self.varnames[i] = var.getName()
Jeremy Hyltona5058122000-02-14 14:14:29 +000077 # lnotab support
78 self.firstlineno = 0
79 self.lastlineno = 0
80 self.last_addr = 0
81 self.lnotab = ''
82
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000083 def _getArgCount(self, args):
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000084 self.argcount = len(args)
85 if args:
86 for arg in args:
87 if isinstance(arg, TupleArg):
88 numNames = len(misc.flatten(arg.names))
89 self.argcount = self.argcount - numNames
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000090
Jeremy Hyltona5058122000-02-14 14:14:29 +000091 def __repr__(self):
92 return "<bytecode: %d instrs>" % len(self.insts)
93
94 def setFlags(self, val):
95 """XXX for module's function"""
96 self.flags = val
97
98 def setOptimized(self):
99 self.flags = self.flags | CO_OPTIMIZED
100
101 def setVarArgs(self):
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +0000102 if not self.flags & CO_VARARGS:
103 self.flags = self.flags | CO_VARARGS
104 self.argcount = self.argcount - 1
Jeremy Hyltona5058122000-02-14 14:14:29 +0000105
106 def setKWArgs(self):
107 self.flags = self.flags | CO_VARKEYWORDS
108
109 def getCurInst(self):
110 return len(self.insts)
111
112 def getNextInst(self):
113 return len(self.insts) + 1
114
115 def dump(self, io=sys.stdout):
116 i = 0
117 for inst in self.insts:
118 if inst[0] == 'SET_LINENO':
119 io.write("\n")
120 io.write(" %3d " % i)
121 if len(inst) == 1:
122 io.write("%s\n" % inst)
123 else:
124 io.write("%-15.15s\t%s\n" % inst)
125 i = i + 1
126
127 def makeCodeObject(self):
128 """Make a Python code object
129
130 This creates a Python code object using the new module. This
131 seems simpler than reverse-engineering the way marshal dumps
132 code objects into .pyc files. One of the key difficulties is
133 figuring out how to layout references to code objects that
134 appear on the VM stack; e.g.
135 3 SET_LINENO 1
136 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
137 9 MAKE_FUNCTION 0
138 12 STORE_NAME 0 (fact)
139 """
140
141 self._findOffsets()
142 lnotab = LineAddrTable()
143 for t in self.insts:
144 opname = t[0]
145 if len(t) == 1:
146 lnotab.addCode(chr(self.opnum[opname]))
147 elif len(t) == 2:
148 oparg = self._convertArg(opname, t[1])
149 if opname == 'SET_LINENO':
150 lnotab.nextLine(oparg)
151 try:
152 hi, lo = divmod(oparg, 256)
153 except TypeError:
154 raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
155 lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
156 chr(hi))
157 # why is a module a special case?
158 if self.flags == 0:
159 nlocals = 0
160 else:
161 nlocals = len(self.varnames)
162 # XXX danger! can't pass through here twice
163 if self.flags & CO_VARKEYWORDS:
164 self.argcount = self.argcount - 1
165 stacksize = findDepth(self.insts)
Jeremy Hylton410e8402000-02-15 21:59:50 +0000166 try:
167 co = new.code(self.argcount, nlocals, stacksize,
168 self.flags, lnotab.getCode(), self._getConsts(),
169 tuple(self.names), tuple(self.varnames),
170 self.filename, self.name, self.firstlineno,
171 lnotab.getTable())
172 except SystemError, err:
173 print err
174 print repr(self.argcount)
175 print repr(nlocals)
176 print repr(stacksize)
177 print repr(self.flags)
178 print repr(lnotab.getCode())
179 print repr(self._getConsts())
180 print repr(self.names)
181 print repr(self.varnames)
182 print repr(self.filename)
183 print repr(self.name)
184 print repr(self.firstlineno)
185 print repr(lnotab.getTable())
186 raise
Jeremy Hyltona5058122000-02-14 14:14:29 +0000187 return co
188
189 def _getConsts(self):
190 """Return a tuple for the const slot of a code object
191
192 Converts PythonVMCode objects to code objects
193 """
194 l = []
195 for elt in self.consts:
196 # XXX might be clearer to just as isinstance(CodeGen)
197 if hasattr(elt, 'asConst'):
198 l.append(elt.asConst())
199 else:
200 l.append(elt)
201 return tuple(l)
202
203 def _findOffsets(self):
204 """Find offsets for use in resolving StackRefs"""
205 self.offsets = []
206 cur = 0
207 for t in self.insts:
208 self.offsets.append(cur)
209 l = len(t)
210 if l == 1:
211 cur = cur + 1
212 elif l == 2:
213 cur = cur + 3
214 arg = t[1]
215 # XXX this is a total hack: for a reference used
216 # multiple times, we create a list of offsets and
217 # expect that we when we pass through the code again
218 # to actually generate the offsets, we'll pass in the
219 # same order.
220 if isinstance(arg, StackRef):
221 try:
222 arg.__offset.append(cur)
223 except AttributeError:
224 arg.__offset = [cur]
225
226 def _convertArg(self, op, arg):
227 """Convert the string representation of an arg to a number
228
229 The specific handling depends on the opcode.
230
231 XXX This first implementation isn't going to be very
232 efficient.
233 """
234 if op == 'SET_LINENO':
235 return arg
236 if op == 'LOAD_CONST':
237 return self._lookupName(arg, self.consts)
238 if op in self.localOps:
239 # make sure it's in self.names, but use the bytecode offset
240 self._lookupName(arg, self.names)
241 return self._lookupName(arg, self.varnames)
242 if op in self.globalOps:
243 return self._lookupName(arg, self.names)
244 if op in self.nameOps:
245 return self._lookupName(arg, self.names)
246 if op == 'COMPARE_OP':
247 return self.cmp_op.index(arg)
248 if self.hasjrel.has_elt(op):
249 offset = arg.__offset[0]
250 del arg.__offset[0]
251 return self.offsets[arg.resolve()] - offset
252 if self.hasjabs.has_elt(op):
253 return self.offsets[arg.resolve()]
254 return arg
255
256 nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
Jeremy Hylton2ce27b22000-02-16 00:50:29 +0000257 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME',
258 'DELETE_ATTR')
Jeremy Hyltona5058122000-02-14 14:14:29 +0000259 localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
260 globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
261
Jeremy Hyltonefd06942000-02-17 22:58:54 +0000262 def _lookupName(self, name, list):
263 """Return index of name in list, appending if necessary"""
Jeremy Hyltona5058122000-02-14 14:14:29 +0000264 if name in list:
Jeremy Hyltonefd06942000-02-17 22:58:54 +0000265 i = list.index(name)
266 # this is cheap, but incorrect in some cases, e.g 2 vs. 2L
267 if type(name) == type(list[i]):
268 return i
269 for i in range(len(list)):
270 elt = list[i]
271 if type(elt) == type(name) and elt == name:
272 return i
273 end = len(list)
274 list.append(name)
275 return end
Jeremy Hyltona5058122000-02-14 14:14:29 +0000276
277 # Convert some stuff from the dis module for local use
278
279 cmp_op = list(dis.cmp_op)
280 hasjrel = misc.Set()
281 for i in dis.hasjrel:
282 hasjrel.add(dis.opname[i])
283 hasjabs = misc.Set()
284 for i in dis.hasjabs:
285 hasjabs.add(dis.opname[i])
286
287 opnum = {}
288 for num in range(len(dis.opname)):
289 opnum[dis.opname[num]] = num
290
291 # this version of emit + arbitrary hooks might work, but it's damn
292 # messy.
293
294 def emit(self, *args):
295 self._emitDispatch(args[0], args[1:])
296 self.insts.append(args)
297
298 def _emitDispatch(self, type, args):
299 for func in self._emit_hooks.get(type, []):
300 func(self, args)
301
302 _emit_hooks = {}
303
304class LineAddrTable:
305 """lnotab
306
307 This class builds the lnotab, which is undocumented but described
308 by com_set_lineno in compile.c. Here's an attempt at explanation:
309
310 For each SET_LINENO instruction after the first one, two bytes are
311 added to lnotab. (In some cases, multiple two-byte entries are
312 added.) The first byte is the distance in bytes between the
313 instruction for the last SET_LINENO and the current SET_LINENO.
314 The second byte is offset in line numbers. If either offset is
315 greater than 255, multiple two-byte entries are added -- one entry
316 for each factor of 255.
317 """
318
319 def __init__(self):
320 self.code = []
321 self.codeOffset = 0
322 self.firstline = 0
323 self.lastline = 0
324 self.lastoff = 0
325 self.lnotab = []
326
327 def addCode(self, code):
328 self.code.append(code)
329 self.codeOffset = self.codeOffset + len(code)
330
331 def nextLine(self, lineno):
332 if self.firstline == 0:
333 self.firstline = lineno
334 self.lastline = lineno
335 else:
336 # compute deltas
337 addr = self.codeOffset - self.lastoff
338 line = lineno - self.lastline
339 while addr > 0 or line > 0:
340 # write the values in 1-byte chunks that sum
341 # to desired value
342 trunc_addr = addr
343 trunc_line = line
344 if trunc_addr > 255:
345 trunc_addr = 255
346 if trunc_line > 255:
347 trunc_line = 255
348 self.lnotab.append(trunc_addr)
349 self.lnotab.append(trunc_line)
350 addr = addr - trunc_addr
351 line = line - trunc_line
352 self.lastline = lineno
353 self.lastoff = self.codeOffset
354
355 def getCode(self):
356 return string.join(self.code, '')
357
358 def getTable(self):
359 return string.join(map(chr, self.lnotab), '')
360
361class StackRef:
362 """Manage stack locations for jumps, loops, etc."""
363 count = 0
364
365 def __init__(self, id=None, val=None):
366 if id is None:
367 id = StackRef.count
368 StackRef.count = StackRef.count + 1
369 self.id = id
370 self.val = val
371
372 def __repr__(self):
373 if self.val:
374 return "StackRef(val=%d)" % self.val
375 else:
376 return "StackRef(id=%d)" % self.id
377
378 def bind(self, inst):
379 self.val = inst
380
381 def resolve(self):
382 if self.val is None:
383 print "UNRESOLVE REF", self
384 return 0
385 return self.val
386
387class StackDepthTracker:
388 # XXX need to keep track of stack depth on jumps
389
390 def findDepth(self, insts):
391 depth = 0
392 maxDepth = 0
393 for i in insts:
394 opname = i[0]
395 delta = self.effect.get(opname, 0)
396 if delta > 1:
397 depth = depth + delta
398 elif delta < 0:
399 if depth > maxDepth:
400 maxDepth = depth
401 depth = depth + delta
402 else:
403 if depth > maxDepth:
404 maxDepth = depth
405 # now check patterns
406 for pat, delta in self.patterns:
407 if opname[:len(pat)] == pat:
408 depth = depth + delta
409 break
410 # if we still haven't found a match
411 if delta == 0:
412 meth = getattr(self, opname)
413 depth = depth + meth(i[1])
414 if depth < 0:
415 depth = 0
416 return maxDepth
417
418 effect = {
419 'POP_TOP': -1,
420 'DUP_TOP': 1,
421 'SLICE+1': -1,
422 'SLICE+2': -1,
423 'SLICE+3': -2,
424 'STORE_SLICE+0': -1,
425 'STORE_SLICE+1': -2,
426 'STORE_SLICE+2': -2,
427 'STORE_SLICE+3': -3,
428 'DELETE_SLICE+0': -1,
429 'DELETE_SLICE+1': -2,
430 'DELETE_SLICE+2': -2,
431 'DELETE_SLICE+3': -3,
432 'STORE_SUBSCR': -3,
433 'DELETE_SUBSCR': -2,
434 # PRINT_EXPR?
435 'PRINT_ITEM': -1,
436 'LOAD_LOCALS': 1,
437 'RETURN_VALUE': -1,
438 'EXEC_STMT': -2,
439 'BUILD_CLASS': -2,
440 'STORE_NAME': -1,
441 'STORE_ATTR': -2,
442 'DELETE_ATTR': -1,
443 'STORE_GLOBAL': -1,
444 'BUILD_MAP': 1,
445 'COMPARE_OP': -1,
446 'STORE_FAST': -1,
447 }
448 # use pattern match
449 patterns = [
450 ('BINARY_', -1),
451 ('LOAD_', 1),
452 ('IMPORT_', 1),
453 ]
454 # special cases
455
456 #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE,
457 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
458 def UNPACK_TUPLE(self, count):
459 return count
460 def UNPACK_LIST(self, count):
461 return count
462 def BUILD_TUPLE(self, count):
463 return -count
464 def BUILD_LIST(self, count):
465 return -count
466 def CALL_FUNCTION(self, argc):
467 hi, lo = divmod(argc, 256)
468 return lo + hi * 2
469 def MAKE_FUNCTION(self, argc):
470 return -argc
471 def BUILD_SLICE(self, argc):
472 if argc == 2:
473 return -1
474 elif argc == 3:
475 return -2
476
477findDepth = StackDepthTracker().findDepth