blob: 2200dec13abcabf204c6b399ecc0708a445a343c [file] [log] [blame]
Jeremy Hyltona5058122000-02-14 14:14:29 +00001"""Assembler for Python bytecode
2
3The new module is used to create the code object. The following
4attribute definitions are included from the reference manual:
5
6co_name gives the function name
7co_argcount is the number of positional arguments (including
8 arguments with default values)
9co_nlocals is the number of local variables used by the function
10 (including arguments)
11co_varnames is a tuple containing the names of the local variables
12 (starting with the argument names)
13co_code is a string representing the sequence of bytecode instructions
14co_consts is a tuple containing the literals used by the bytecode
15co_names is a tuple containing the names used by the bytecode
16co_filename is the filename from which the code was compiled
17co_firstlineno is the first line number of the function
18co_lnotab is a string encoding the mapping from byte code offsets
19 to line numbers. see LineAddrTable below.
20co_stacksize is the required stack size (including local variables)
21co_flags is an integer encoding a number of flags for the
22 interpreter. There are four flags:
23 CO_OPTIMIZED -- uses load fast
24 CO_NEWLOCALS -- everything?
25 CO_VARARGS -- use *args
26 CO_VARKEYWORDS -- uses **args
27
28If a code object represents a function, the first item in co_consts is
29the documentation string of the function, or None if undefined.
30"""
31
32import sys
33import dis
34import new
35import string
36
37import misc
38
39# flags for code objects
40CO_OPTIMIZED = 0x0001
41CO_NEWLOCALS = 0x0002
42CO_VARARGS = 0x0004
43CO_VARKEYWORDS = 0x0008
44
45class PyAssembler:
46 """Creates Python code objects
47 """
48
49 # XXX this class needs to major refactoring
50
51 def __init__(self, args=(), name='?', filename='<?>',
52 docstring=None):
53 # XXX why is the default value for flags 3?
54 self.insts = []
55 # used by makeCodeObject
56 self.argcount = len(args)
57 self.code = ''
58 self.consts = [docstring]
59 self.filename = filename
60 self.flags = CO_NEWLOCALS
61 self.name = name
62 self.names = []
63 self.varnames = list(args) or []
64 # lnotab support
65 self.firstlineno = 0
66 self.lastlineno = 0
67 self.last_addr = 0
68 self.lnotab = ''
69
70 def __repr__(self):
71 return "<bytecode: %d instrs>" % len(self.insts)
72
73 def setFlags(self, val):
74 """XXX for module's function"""
75 self.flags = val
76
77 def setOptimized(self):
78 self.flags = self.flags | CO_OPTIMIZED
79
80 def setVarArgs(self):
81 self.flags = self.flags | CO_VARARGS
82
83 def setKWArgs(self):
84 self.flags = self.flags | CO_VARKEYWORDS
85
86 def getCurInst(self):
87 return len(self.insts)
88
89 def getNextInst(self):
90 return len(self.insts) + 1
91
92 def dump(self, io=sys.stdout):
93 i = 0
94 for inst in self.insts:
95 if inst[0] == 'SET_LINENO':
96 io.write("\n")
97 io.write(" %3d " % i)
98 if len(inst) == 1:
99 io.write("%s\n" % inst)
100 else:
101 io.write("%-15.15s\t%s\n" % inst)
102 i = i + 1
103
104 def makeCodeObject(self):
105 """Make a Python code object
106
107 This creates a Python code object using the new module. This
108 seems simpler than reverse-engineering the way marshal dumps
109 code objects into .pyc files. One of the key difficulties is
110 figuring out how to layout references to code objects that
111 appear on the VM stack; e.g.
112 3 SET_LINENO 1
113 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
114 9 MAKE_FUNCTION 0
115 12 STORE_NAME 0 (fact)
116 """
117
118 self._findOffsets()
119 lnotab = LineAddrTable()
120 for t in self.insts:
121 opname = t[0]
122 if len(t) == 1:
123 lnotab.addCode(chr(self.opnum[opname]))
124 elif len(t) == 2:
125 oparg = self._convertArg(opname, t[1])
126 if opname == 'SET_LINENO':
127 lnotab.nextLine(oparg)
128 try:
129 hi, lo = divmod(oparg, 256)
130 except TypeError:
131 raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
132 lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
133 chr(hi))
134 # why is a module a special case?
135 if self.flags == 0:
136 nlocals = 0
137 else:
138 nlocals = len(self.varnames)
139 # XXX danger! can't pass through here twice
140 if self.flags & CO_VARKEYWORDS:
141 self.argcount = self.argcount - 1
142 stacksize = findDepth(self.insts)
Jeremy Hylton410e8402000-02-15 21:59:50 +0000143 try:
144 co = new.code(self.argcount, nlocals, stacksize,
145 self.flags, lnotab.getCode(), self._getConsts(),
146 tuple(self.names), tuple(self.varnames),
147 self.filename, self.name, self.firstlineno,
148 lnotab.getTable())
149 except SystemError, err:
150 print err
151 print repr(self.argcount)
152 print repr(nlocals)
153 print repr(stacksize)
154 print repr(self.flags)
155 print repr(lnotab.getCode())
156 print repr(self._getConsts())
157 print repr(self.names)
158 print repr(self.varnames)
159 print repr(self.filename)
160 print repr(self.name)
161 print repr(self.firstlineno)
162 print repr(lnotab.getTable())
163 raise
Jeremy Hyltona5058122000-02-14 14:14:29 +0000164 return co
165
166 def _getConsts(self):
167 """Return a tuple for the const slot of a code object
168
169 Converts PythonVMCode objects to code objects
170 """
171 l = []
172 for elt in self.consts:
173 # XXX might be clearer to just as isinstance(CodeGen)
174 if hasattr(elt, 'asConst'):
175 l.append(elt.asConst())
176 else:
177 l.append(elt)
178 return tuple(l)
179
180 def _findOffsets(self):
181 """Find offsets for use in resolving StackRefs"""
182 self.offsets = []
183 cur = 0
184 for t in self.insts:
185 self.offsets.append(cur)
186 l = len(t)
187 if l == 1:
188 cur = cur + 1
189 elif l == 2:
190 cur = cur + 3
191 arg = t[1]
192 # XXX this is a total hack: for a reference used
193 # multiple times, we create a list of offsets and
194 # expect that we when we pass through the code again
195 # to actually generate the offsets, we'll pass in the
196 # same order.
197 if isinstance(arg, StackRef):
198 try:
199 arg.__offset.append(cur)
200 except AttributeError:
201 arg.__offset = [cur]
202
203 def _convertArg(self, op, arg):
204 """Convert the string representation of an arg to a number
205
206 The specific handling depends on the opcode.
207
208 XXX This first implementation isn't going to be very
209 efficient.
210 """
211 if op == 'SET_LINENO':
212 return arg
213 if op == 'LOAD_CONST':
214 return self._lookupName(arg, self.consts)
215 if op in self.localOps:
216 # make sure it's in self.names, but use the bytecode offset
217 self._lookupName(arg, self.names)
218 return self._lookupName(arg, self.varnames)
219 if op in self.globalOps:
220 return self._lookupName(arg, self.names)
221 if op in self.nameOps:
222 return self._lookupName(arg, self.names)
223 if op == 'COMPARE_OP':
224 return self.cmp_op.index(arg)
225 if self.hasjrel.has_elt(op):
226 offset = arg.__offset[0]
227 del arg.__offset[0]
228 return self.offsets[arg.resolve()] - offset
229 if self.hasjabs.has_elt(op):
230 return self.offsets[arg.resolve()]
231 return arg
232
233 nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
234 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME')
235 localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
236 globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
237
238 def _lookupName(self, name, list, list2=None):
239 """Return index of name in list, appending if necessary
240
241 Yicky hack: Second list can be used for lookup of local names
242 where the name needs to be added to varnames and names.
243 """
244 if name in list:
245 return list.index(name)
246 else:
247 end = len(list)
248 list.append(name)
249 if list2 is not None:
250 list2.append(name)
251 return end
252
253 # Convert some stuff from the dis module for local use
254
255 cmp_op = list(dis.cmp_op)
256 hasjrel = misc.Set()
257 for i in dis.hasjrel:
258 hasjrel.add(dis.opname[i])
259 hasjabs = misc.Set()
260 for i in dis.hasjabs:
261 hasjabs.add(dis.opname[i])
262
263 opnum = {}
264 for num in range(len(dis.opname)):
265 opnum[dis.opname[num]] = num
266
267 # this version of emit + arbitrary hooks might work, but it's damn
268 # messy.
269
270 def emit(self, *args):
271 self._emitDispatch(args[0], args[1:])
272 self.insts.append(args)
273
274 def _emitDispatch(self, type, args):
275 for func in self._emit_hooks.get(type, []):
276 func(self, args)
277
278 _emit_hooks = {}
279
280class LineAddrTable:
281 """lnotab
282
283 This class builds the lnotab, which is undocumented but described
284 by com_set_lineno in compile.c. Here's an attempt at explanation:
285
286 For each SET_LINENO instruction after the first one, two bytes are
287 added to lnotab. (In some cases, multiple two-byte entries are
288 added.) The first byte is the distance in bytes between the
289 instruction for the last SET_LINENO and the current SET_LINENO.
290 The second byte is offset in line numbers. If either offset is
291 greater than 255, multiple two-byte entries are added -- one entry
292 for each factor of 255.
293 """
294
295 def __init__(self):
296 self.code = []
297 self.codeOffset = 0
298 self.firstline = 0
299 self.lastline = 0
300 self.lastoff = 0
301 self.lnotab = []
302
303 def addCode(self, code):
304 self.code.append(code)
305 self.codeOffset = self.codeOffset + len(code)
306
307 def nextLine(self, lineno):
308 if self.firstline == 0:
309 self.firstline = lineno
310 self.lastline = lineno
311 else:
312 # compute deltas
313 addr = self.codeOffset - self.lastoff
314 line = lineno - self.lastline
315 while addr > 0 or line > 0:
316 # write the values in 1-byte chunks that sum
317 # to desired value
318 trunc_addr = addr
319 trunc_line = line
320 if trunc_addr > 255:
321 trunc_addr = 255
322 if trunc_line > 255:
323 trunc_line = 255
324 self.lnotab.append(trunc_addr)
325 self.lnotab.append(trunc_line)
326 addr = addr - trunc_addr
327 line = line - trunc_line
328 self.lastline = lineno
329 self.lastoff = self.codeOffset
330
331 def getCode(self):
332 return string.join(self.code, '')
333
334 def getTable(self):
335 return string.join(map(chr, self.lnotab), '')
336
337class StackRef:
338 """Manage stack locations for jumps, loops, etc."""
339 count = 0
340
341 def __init__(self, id=None, val=None):
342 if id is None:
343 id = StackRef.count
344 StackRef.count = StackRef.count + 1
345 self.id = id
346 self.val = val
347
348 def __repr__(self):
349 if self.val:
350 return "StackRef(val=%d)" % self.val
351 else:
352 return "StackRef(id=%d)" % self.id
353
354 def bind(self, inst):
355 self.val = inst
356
357 def resolve(self):
358 if self.val is None:
359 print "UNRESOLVE REF", self
360 return 0
361 return self.val
362
363class StackDepthTracker:
364 # XXX need to keep track of stack depth on jumps
365
366 def findDepth(self, insts):
367 depth = 0
368 maxDepth = 0
369 for i in insts:
370 opname = i[0]
371 delta = self.effect.get(opname, 0)
372 if delta > 1:
373 depth = depth + delta
374 elif delta < 0:
375 if depth > maxDepth:
376 maxDepth = depth
377 depth = depth + delta
378 else:
379 if depth > maxDepth:
380 maxDepth = depth
381 # now check patterns
382 for pat, delta in self.patterns:
383 if opname[:len(pat)] == pat:
384 depth = depth + delta
385 break
386 # if we still haven't found a match
387 if delta == 0:
388 meth = getattr(self, opname)
389 depth = depth + meth(i[1])
390 if depth < 0:
391 depth = 0
392 return maxDepth
393
394 effect = {
395 'POP_TOP': -1,
396 'DUP_TOP': 1,
397 'SLICE+1': -1,
398 'SLICE+2': -1,
399 'SLICE+3': -2,
400 'STORE_SLICE+0': -1,
401 'STORE_SLICE+1': -2,
402 'STORE_SLICE+2': -2,
403 'STORE_SLICE+3': -3,
404 'DELETE_SLICE+0': -1,
405 'DELETE_SLICE+1': -2,
406 'DELETE_SLICE+2': -2,
407 'DELETE_SLICE+3': -3,
408 'STORE_SUBSCR': -3,
409 'DELETE_SUBSCR': -2,
410 # PRINT_EXPR?
411 'PRINT_ITEM': -1,
412 'LOAD_LOCALS': 1,
413 'RETURN_VALUE': -1,
414 'EXEC_STMT': -2,
415 'BUILD_CLASS': -2,
416 'STORE_NAME': -1,
417 'STORE_ATTR': -2,
418 'DELETE_ATTR': -1,
419 'STORE_GLOBAL': -1,
420 'BUILD_MAP': 1,
421 'COMPARE_OP': -1,
422 'STORE_FAST': -1,
423 }
424 # use pattern match
425 patterns = [
426 ('BINARY_', -1),
427 ('LOAD_', 1),
428 ('IMPORT_', 1),
429 ]
430 # special cases
431
432 #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE,
433 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
434 def UNPACK_TUPLE(self, count):
435 return count
436 def UNPACK_LIST(self, count):
437 return count
438 def BUILD_TUPLE(self, count):
439 return -count
440 def BUILD_LIST(self, count):
441 return -count
442 def CALL_FUNCTION(self, argc):
443 hi, lo = divmod(argc, 256)
444 return lo + hi * 2
445 def MAKE_FUNCTION(self, argc):
446 return -argc
447 def BUILD_SLICE(self, argc):
448 if argc == 2:
449 return -1
450 elif argc == 3:
451 return -2
452
453findDepth = StackDepthTracker().findDepth