blob: 7efc4ab470e14135f718c5766536a7b10575bfa3 [file] [log] [blame]
Jeremy Hyltona5058122000-02-14 14:14:29 +00001"""Assembler for Python bytecode
2
3The new module is used to create the code object. The following
4attribute definitions are included from the reference manual:
5
6co_name gives the function name
7co_argcount is the number of positional arguments (including
8 arguments with default values)
9co_nlocals is the number of local variables used by the function
10 (including arguments)
11co_varnames is a tuple containing the names of the local variables
12 (starting with the argument names)
13co_code is a string representing the sequence of bytecode instructions
14co_consts is a tuple containing the literals used by the bytecode
15co_names is a tuple containing the names used by the bytecode
16co_filename is the filename from which the code was compiled
17co_firstlineno is the first line number of the function
18co_lnotab is a string encoding the mapping from byte code offsets
19 to line numbers. see LineAddrTable below.
20co_stacksize is the required stack size (including local variables)
21co_flags is an integer encoding a number of flags for the
22 interpreter. There are four flags:
23 CO_OPTIMIZED -- uses load fast
24 CO_NEWLOCALS -- everything?
25 CO_VARARGS -- use *args
26 CO_VARKEYWORDS -- uses **args
27
28If a code object represents a function, the first item in co_consts is
29the documentation string of the function, or None if undefined.
30"""
31
32import sys
33import dis
34import new
35import string
36
37import misc
38
39# flags for code objects
40CO_OPTIMIZED = 0x0001
41CO_NEWLOCALS = 0x0002
42CO_VARARGS = 0x0004
43CO_VARKEYWORDS = 0x0008
44
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000045class TupleArg:
46 def __init__(self, count, names):
47 self.count = count
48 self.names = names
49 def __repr__(self):
50 return "TupleArg(%s, %s)" % (self.count, self.names)
51 def getName(self):
52 return ".nested%d" % self.count
53
Jeremy Hyltona5058122000-02-14 14:14:29 +000054class PyAssembler:
55 """Creates Python code objects
56 """
57
58 # XXX this class needs to major refactoring
59
60 def __init__(self, args=(), name='?', filename='<?>',
61 docstring=None):
62 # XXX why is the default value for flags 3?
63 self.insts = []
64 # used by makeCodeObject
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000065 self._getArgCount(args)
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000066 print name, args, self.argcount
Jeremy Hyltona5058122000-02-14 14:14:29 +000067 self.code = ''
68 self.consts = [docstring]
69 self.filename = filename
70 self.flags = CO_NEWLOCALS
71 self.name = name
72 self.names = []
73 self.varnames = list(args) or []
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000074 for i in range(len(self.varnames)):
75 var = self.varnames[i]
76 if isinstance(var, TupleArg):
77 self.varnames[i] = var.getName()
Jeremy Hyltona5058122000-02-14 14:14:29 +000078 # lnotab support
79 self.firstlineno = 0
80 self.lastlineno = 0
81 self.last_addr = 0
82 self.lnotab = ''
83
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000084 def _getArgCount(self, args):
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +000085 self.argcount = len(args)
86 if args:
87 for arg in args:
88 if isinstance(arg, TupleArg):
89 numNames = len(misc.flatten(arg.names))
90 self.argcount = self.argcount - numNames
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000091
Jeremy Hyltona5058122000-02-14 14:14:29 +000092 def __repr__(self):
93 return "<bytecode: %d instrs>" % len(self.insts)
94
95 def setFlags(self, val):
96 """XXX for module's function"""
97 self.flags = val
98
99 def setOptimized(self):
100 self.flags = self.flags | CO_OPTIMIZED
101
102 def setVarArgs(self):
Jeremy Hylton3ec7e2c2000-02-17 22:09:35 +0000103 if not self.flags & CO_VARARGS:
104 self.flags = self.flags | CO_VARARGS
105 self.argcount = self.argcount - 1
Jeremy Hyltona5058122000-02-14 14:14:29 +0000106
107 def setKWArgs(self):
108 self.flags = self.flags | CO_VARKEYWORDS
109
110 def getCurInst(self):
111 return len(self.insts)
112
113 def getNextInst(self):
114 return len(self.insts) + 1
115
116 def dump(self, io=sys.stdout):
117 i = 0
118 for inst in self.insts:
119 if inst[0] == 'SET_LINENO':
120 io.write("\n")
121 io.write(" %3d " % i)
122 if len(inst) == 1:
123 io.write("%s\n" % inst)
124 else:
125 io.write("%-15.15s\t%s\n" % inst)
126 i = i + 1
127
128 def makeCodeObject(self):
129 """Make a Python code object
130
131 This creates a Python code object using the new module. This
132 seems simpler than reverse-engineering the way marshal dumps
133 code objects into .pyc files. One of the key difficulties is
134 figuring out how to layout references to code objects that
135 appear on the VM stack; e.g.
136 3 SET_LINENO 1
137 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
138 9 MAKE_FUNCTION 0
139 12 STORE_NAME 0 (fact)
140 """
141
142 self._findOffsets()
143 lnotab = LineAddrTable()
144 for t in self.insts:
145 opname = t[0]
146 if len(t) == 1:
147 lnotab.addCode(chr(self.opnum[opname]))
148 elif len(t) == 2:
149 oparg = self._convertArg(opname, t[1])
150 if opname == 'SET_LINENO':
151 lnotab.nextLine(oparg)
152 try:
153 hi, lo = divmod(oparg, 256)
154 except TypeError:
155 raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
156 lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
157 chr(hi))
158 # why is a module a special case?
159 if self.flags == 0:
160 nlocals = 0
161 else:
162 nlocals = len(self.varnames)
163 # XXX danger! can't pass through here twice
164 if self.flags & CO_VARKEYWORDS:
165 self.argcount = self.argcount - 1
166 stacksize = findDepth(self.insts)
Jeremy Hylton410e8402000-02-15 21:59:50 +0000167 try:
168 co = new.code(self.argcount, nlocals, stacksize,
169 self.flags, lnotab.getCode(), self._getConsts(),
170 tuple(self.names), tuple(self.varnames),
171 self.filename, self.name, self.firstlineno,
172 lnotab.getTable())
173 except SystemError, err:
174 print err
175 print repr(self.argcount)
176 print repr(nlocals)
177 print repr(stacksize)
178 print repr(self.flags)
179 print repr(lnotab.getCode())
180 print repr(self._getConsts())
181 print repr(self.names)
182 print repr(self.varnames)
183 print repr(self.filename)
184 print repr(self.name)
185 print repr(self.firstlineno)
186 print repr(lnotab.getTable())
187 raise
Jeremy Hyltona5058122000-02-14 14:14:29 +0000188 return co
189
190 def _getConsts(self):
191 """Return a tuple for the const slot of a code object
192
193 Converts PythonVMCode objects to code objects
194 """
195 l = []
196 for elt in self.consts:
197 # XXX might be clearer to just as isinstance(CodeGen)
198 if hasattr(elt, 'asConst'):
199 l.append(elt.asConst())
200 else:
201 l.append(elt)
202 return tuple(l)
203
204 def _findOffsets(self):
205 """Find offsets for use in resolving StackRefs"""
206 self.offsets = []
207 cur = 0
208 for t in self.insts:
209 self.offsets.append(cur)
210 l = len(t)
211 if l == 1:
212 cur = cur + 1
213 elif l == 2:
214 cur = cur + 3
215 arg = t[1]
216 # XXX this is a total hack: for a reference used
217 # multiple times, we create a list of offsets and
218 # expect that we when we pass through the code again
219 # to actually generate the offsets, we'll pass in the
220 # same order.
221 if isinstance(arg, StackRef):
222 try:
223 arg.__offset.append(cur)
224 except AttributeError:
225 arg.__offset = [cur]
226
227 def _convertArg(self, op, arg):
228 """Convert the string representation of an arg to a number
229
230 The specific handling depends on the opcode.
231
232 XXX This first implementation isn't going to be very
233 efficient.
234 """
235 if op == 'SET_LINENO':
236 return arg
237 if op == 'LOAD_CONST':
238 return self._lookupName(arg, self.consts)
239 if op in self.localOps:
240 # make sure it's in self.names, but use the bytecode offset
241 self._lookupName(arg, self.names)
242 return self._lookupName(arg, self.varnames)
243 if op in self.globalOps:
244 return self._lookupName(arg, self.names)
245 if op in self.nameOps:
246 return self._lookupName(arg, self.names)
247 if op == 'COMPARE_OP':
248 return self.cmp_op.index(arg)
249 if self.hasjrel.has_elt(op):
250 offset = arg.__offset[0]
251 del arg.__offset[0]
252 return self.offsets[arg.resolve()] - offset
253 if self.hasjabs.has_elt(op):
254 return self.offsets[arg.resolve()]
255 return arg
256
257 nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
Jeremy Hylton2ce27b22000-02-16 00:50:29 +0000258 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME',
259 'DELETE_ATTR')
Jeremy Hyltona5058122000-02-14 14:14:29 +0000260 localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
261 globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
262
263 def _lookupName(self, name, list, list2=None):
264 """Return index of name in list, appending if necessary
265
266 Yicky hack: Second list can be used for lookup of local names
267 where the name needs to be added to varnames and names.
268 """
269 if name in list:
270 return list.index(name)
271 else:
272 end = len(list)
273 list.append(name)
274 if list2 is not None:
275 list2.append(name)
276 return end
277
278 # Convert some stuff from the dis module for local use
279
280 cmp_op = list(dis.cmp_op)
281 hasjrel = misc.Set()
282 for i in dis.hasjrel:
283 hasjrel.add(dis.opname[i])
284 hasjabs = misc.Set()
285 for i in dis.hasjabs:
286 hasjabs.add(dis.opname[i])
287
288 opnum = {}
289 for num in range(len(dis.opname)):
290 opnum[dis.opname[num]] = num
291
292 # this version of emit + arbitrary hooks might work, but it's damn
293 # messy.
294
295 def emit(self, *args):
296 self._emitDispatch(args[0], args[1:])
297 self.insts.append(args)
298
299 def _emitDispatch(self, type, args):
300 for func in self._emit_hooks.get(type, []):
301 func(self, args)
302
303 _emit_hooks = {}
304
305class LineAddrTable:
306 """lnotab
307
308 This class builds the lnotab, which is undocumented but described
309 by com_set_lineno in compile.c. Here's an attempt at explanation:
310
311 For each SET_LINENO instruction after the first one, two bytes are
312 added to lnotab. (In some cases, multiple two-byte entries are
313 added.) The first byte is the distance in bytes between the
314 instruction for the last SET_LINENO and the current SET_LINENO.
315 The second byte is offset in line numbers. If either offset is
316 greater than 255, multiple two-byte entries are added -- one entry
317 for each factor of 255.
318 """
319
320 def __init__(self):
321 self.code = []
322 self.codeOffset = 0
323 self.firstline = 0
324 self.lastline = 0
325 self.lastoff = 0
326 self.lnotab = []
327
328 def addCode(self, code):
329 self.code.append(code)
330 self.codeOffset = self.codeOffset + len(code)
331
332 def nextLine(self, lineno):
333 if self.firstline == 0:
334 self.firstline = lineno
335 self.lastline = lineno
336 else:
337 # compute deltas
338 addr = self.codeOffset - self.lastoff
339 line = lineno - self.lastline
340 while addr > 0 or line > 0:
341 # write the values in 1-byte chunks that sum
342 # to desired value
343 trunc_addr = addr
344 trunc_line = line
345 if trunc_addr > 255:
346 trunc_addr = 255
347 if trunc_line > 255:
348 trunc_line = 255
349 self.lnotab.append(trunc_addr)
350 self.lnotab.append(trunc_line)
351 addr = addr - trunc_addr
352 line = line - trunc_line
353 self.lastline = lineno
354 self.lastoff = self.codeOffset
355
356 def getCode(self):
357 return string.join(self.code, '')
358
359 def getTable(self):
360 return string.join(map(chr, self.lnotab), '')
361
362class StackRef:
363 """Manage stack locations for jumps, loops, etc."""
364 count = 0
365
366 def __init__(self, id=None, val=None):
367 if id is None:
368 id = StackRef.count
369 StackRef.count = StackRef.count + 1
370 self.id = id
371 self.val = val
372
373 def __repr__(self):
374 if self.val:
375 return "StackRef(val=%d)" % self.val
376 else:
377 return "StackRef(id=%d)" % self.id
378
379 def bind(self, inst):
380 self.val = inst
381
382 def resolve(self):
383 if self.val is None:
384 print "UNRESOLVE REF", self
385 return 0
386 return self.val
387
388class StackDepthTracker:
389 # XXX need to keep track of stack depth on jumps
390
391 def findDepth(self, insts):
392 depth = 0
393 maxDepth = 0
394 for i in insts:
395 opname = i[0]
396 delta = self.effect.get(opname, 0)
397 if delta > 1:
398 depth = depth + delta
399 elif delta < 0:
400 if depth > maxDepth:
401 maxDepth = depth
402 depth = depth + delta
403 else:
404 if depth > maxDepth:
405 maxDepth = depth
406 # now check patterns
407 for pat, delta in self.patterns:
408 if opname[:len(pat)] == pat:
409 depth = depth + delta
410 break
411 # if we still haven't found a match
412 if delta == 0:
413 meth = getattr(self, opname)
414 depth = depth + meth(i[1])
415 if depth < 0:
416 depth = 0
417 return maxDepth
418
419 effect = {
420 'POP_TOP': -1,
421 'DUP_TOP': 1,
422 'SLICE+1': -1,
423 'SLICE+2': -1,
424 'SLICE+3': -2,
425 'STORE_SLICE+0': -1,
426 'STORE_SLICE+1': -2,
427 'STORE_SLICE+2': -2,
428 'STORE_SLICE+3': -3,
429 'DELETE_SLICE+0': -1,
430 'DELETE_SLICE+1': -2,
431 'DELETE_SLICE+2': -2,
432 'DELETE_SLICE+3': -3,
433 'STORE_SUBSCR': -3,
434 'DELETE_SUBSCR': -2,
435 # PRINT_EXPR?
436 'PRINT_ITEM': -1,
437 'LOAD_LOCALS': 1,
438 'RETURN_VALUE': -1,
439 'EXEC_STMT': -2,
440 'BUILD_CLASS': -2,
441 'STORE_NAME': -1,
442 'STORE_ATTR': -2,
443 'DELETE_ATTR': -1,
444 'STORE_GLOBAL': -1,
445 'BUILD_MAP': 1,
446 'COMPARE_OP': -1,
447 'STORE_FAST': -1,
448 }
449 # use pattern match
450 patterns = [
451 ('BINARY_', -1),
452 ('LOAD_', 1),
453 ('IMPORT_', 1),
454 ]
455 # special cases
456
457 #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE,
458 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
459 def UNPACK_TUPLE(self, count):
460 return count
461 def UNPACK_LIST(self, count):
462 return count
463 def BUILD_TUPLE(self, count):
464 return -count
465 def BUILD_LIST(self, count):
466 return -count
467 def CALL_FUNCTION(self, argc):
468 hi, lo = divmod(argc, 256)
469 return lo + hi * 2
470 def MAKE_FUNCTION(self, argc):
471 return -argc
472 def BUILD_SLICE(self, argc):
473 if argc == 2:
474 return -1
475 elif argc == 3:
476 return -2
477
478findDepth = StackDepthTracker().findDepth