blob: 047836b13d5985337de1ec3f309f976d7ee7e3b9 [file] [log] [blame]
Jeremy Hyltona5058122000-02-14 14:14:29 +00001"""Assembler for Python bytecode
2
3The new module is used to create the code object. The following
4attribute definitions are included from the reference manual:
5
6co_name gives the function name
7co_argcount is the number of positional arguments (including
8 arguments with default values)
9co_nlocals is the number of local variables used by the function
10 (including arguments)
11co_varnames is a tuple containing the names of the local variables
12 (starting with the argument names)
13co_code is a string representing the sequence of bytecode instructions
14co_consts is a tuple containing the literals used by the bytecode
15co_names is a tuple containing the names used by the bytecode
16co_filename is the filename from which the code was compiled
17co_firstlineno is the first line number of the function
18co_lnotab is a string encoding the mapping from byte code offsets
19 to line numbers. see LineAddrTable below.
20co_stacksize is the required stack size (including local variables)
21co_flags is an integer encoding a number of flags for the
22 interpreter. There are four flags:
23 CO_OPTIMIZED -- uses load fast
24 CO_NEWLOCALS -- everything?
25 CO_VARARGS -- use *args
26 CO_VARKEYWORDS -- uses **args
27
28If a code object represents a function, the first item in co_consts is
29the documentation string of the function, or None if undefined.
30"""
31
32import sys
33import dis
34import new
35import string
36
37import misc
38
39# flags for code objects
40CO_OPTIMIZED = 0x0001
41CO_NEWLOCALS = 0x0002
42CO_VARARGS = 0x0004
43CO_VARKEYWORDS = 0x0008
44
45class PyAssembler:
46 """Creates Python code objects
47 """
48
49 # XXX this class needs to major refactoring
50
51 def __init__(self, args=(), name='?', filename='<?>',
52 docstring=None):
53 # XXX why is the default value for flags 3?
54 self.insts = []
55 # used by makeCodeObject
56 self.argcount = len(args)
57 self.code = ''
58 self.consts = [docstring]
59 self.filename = filename
60 self.flags = CO_NEWLOCALS
61 self.name = name
62 self.names = []
63 self.varnames = list(args) or []
64 # lnotab support
65 self.firstlineno = 0
66 self.lastlineno = 0
67 self.last_addr = 0
68 self.lnotab = ''
69
70 def __repr__(self):
71 return "<bytecode: %d instrs>" % len(self.insts)
72
73 def setFlags(self, val):
74 """XXX for module's function"""
75 self.flags = val
76
77 def setOptimized(self):
78 self.flags = self.flags | CO_OPTIMIZED
79
80 def setVarArgs(self):
81 self.flags = self.flags | CO_VARARGS
82
83 def setKWArgs(self):
84 self.flags = self.flags | CO_VARKEYWORDS
85
86 def getCurInst(self):
87 return len(self.insts)
88
89 def getNextInst(self):
90 return len(self.insts) + 1
91
92 def dump(self, io=sys.stdout):
93 i = 0
94 for inst in self.insts:
95 if inst[0] == 'SET_LINENO':
96 io.write("\n")
97 io.write(" %3d " % i)
98 if len(inst) == 1:
99 io.write("%s\n" % inst)
100 else:
101 io.write("%-15.15s\t%s\n" % inst)
102 i = i + 1
103
104 def makeCodeObject(self):
105 """Make a Python code object
106
107 This creates a Python code object using the new module. This
108 seems simpler than reverse-engineering the way marshal dumps
109 code objects into .pyc files. One of the key difficulties is
110 figuring out how to layout references to code objects that
111 appear on the VM stack; e.g.
112 3 SET_LINENO 1
113 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
114 9 MAKE_FUNCTION 0
115 12 STORE_NAME 0 (fact)
116 """
117
118 self._findOffsets()
119 lnotab = LineAddrTable()
120 for t in self.insts:
121 opname = t[0]
122 if len(t) == 1:
123 lnotab.addCode(chr(self.opnum[opname]))
124 elif len(t) == 2:
125 oparg = self._convertArg(opname, t[1])
126 if opname == 'SET_LINENO':
127 lnotab.nextLine(oparg)
128 try:
129 hi, lo = divmod(oparg, 256)
130 except TypeError:
131 raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
132 lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
133 chr(hi))
134 # why is a module a special case?
135 if self.flags == 0:
136 nlocals = 0
137 else:
138 nlocals = len(self.varnames)
139 # XXX danger! can't pass through here twice
140 if self.flags & CO_VARKEYWORDS:
141 self.argcount = self.argcount - 1
142 stacksize = findDepth(self.insts)
143 co = new.code(self.argcount, nlocals, stacksize,
144 self.flags, lnotab.getCode(), self._getConsts(),
145 tuple(self.names), tuple(self.varnames),
146 self.filename, self.name, self.firstlineno,
147 lnotab.getTable())
148 return co
149
150 def _getConsts(self):
151 """Return a tuple for the const slot of a code object
152
153 Converts PythonVMCode objects to code objects
154 """
155 l = []
156 for elt in self.consts:
157 # XXX might be clearer to just as isinstance(CodeGen)
158 if hasattr(elt, 'asConst'):
159 l.append(elt.asConst())
160 else:
161 l.append(elt)
162 return tuple(l)
163
164 def _findOffsets(self):
165 """Find offsets for use in resolving StackRefs"""
166 self.offsets = []
167 cur = 0
168 for t in self.insts:
169 self.offsets.append(cur)
170 l = len(t)
171 if l == 1:
172 cur = cur + 1
173 elif l == 2:
174 cur = cur + 3
175 arg = t[1]
176 # XXX this is a total hack: for a reference used
177 # multiple times, we create a list of offsets and
178 # expect that we when we pass through the code again
179 # to actually generate the offsets, we'll pass in the
180 # same order.
181 if isinstance(arg, StackRef):
182 try:
183 arg.__offset.append(cur)
184 except AttributeError:
185 arg.__offset = [cur]
186
187 def _convertArg(self, op, arg):
188 """Convert the string representation of an arg to a number
189
190 The specific handling depends on the opcode.
191
192 XXX This first implementation isn't going to be very
193 efficient.
194 """
195 if op == 'SET_LINENO':
196 return arg
197 if op == 'LOAD_CONST':
198 return self._lookupName(arg, self.consts)
199 if op in self.localOps:
200 # make sure it's in self.names, but use the bytecode offset
201 self._lookupName(arg, self.names)
202 return self._lookupName(arg, self.varnames)
203 if op in self.globalOps:
204 return self._lookupName(arg, self.names)
205 if op in self.nameOps:
206 return self._lookupName(arg, self.names)
207 if op == 'COMPARE_OP':
208 return self.cmp_op.index(arg)
209 if self.hasjrel.has_elt(op):
210 offset = arg.__offset[0]
211 del arg.__offset[0]
212 return self.offsets[arg.resolve()] - offset
213 if self.hasjabs.has_elt(op):
214 return self.offsets[arg.resolve()]
215 return arg
216
217 nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
218 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME')
219 localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
220 globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
221
222 def _lookupName(self, name, list, list2=None):
223 """Return index of name in list, appending if necessary
224
225 Yicky hack: Second list can be used for lookup of local names
226 where the name needs to be added to varnames and names.
227 """
228 if name in list:
229 return list.index(name)
230 else:
231 end = len(list)
232 list.append(name)
233 if list2 is not None:
234 list2.append(name)
235 return end
236
237 # Convert some stuff from the dis module for local use
238
239 cmp_op = list(dis.cmp_op)
240 hasjrel = misc.Set()
241 for i in dis.hasjrel:
242 hasjrel.add(dis.opname[i])
243 hasjabs = misc.Set()
244 for i in dis.hasjabs:
245 hasjabs.add(dis.opname[i])
246
247 opnum = {}
248 for num in range(len(dis.opname)):
249 opnum[dis.opname[num]] = num
250
251 # this version of emit + arbitrary hooks might work, but it's damn
252 # messy.
253
254 def emit(self, *args):
255 self._emitDispatch(args[0], args[1:])
256 self.insts.append(args)
257
258 def _emitDispatch(self, type, args):
259 for func in self._emit_hooks.get(type, []):
260 func(self, args)
261
262 _emit_hooks = {}
263
264class LineAddrTable:
265 """lnotab
266
267 This class builds the lnotab, which is undocumented but described
268 by com_set_lineno in compile.c. Here's an attempt at explanation:
269
270 For each SET_LINENO instruction after the first one, two bytes are
271 added to lnotab. (In some cases, multiple two-byte entries are
272 added.) The first byte is the distance in bytes between the
273 instruction for the last SET_LINENO and the current SET_LINENO.
274 The second byte is offset in line numbers. If either offset is
275 greater than 255, multiple two-byte entries are added -- one entry
276 for each factor of 255.
277 """
278
279 def __init__(self):
280 self.code = []
281 self.codeOffset = 0
282 self.firstline = 0
283 self.lastline = 0
284 self.lastoff = 0
285 self.lnotab = []
286
287 def addCode(self, code):
288 self.code.append(code)
289 self.codeOffset = self.codeOffset + len(code)
290
291 def nextLine(self, lineno):
292 if self.firstline == 0:
293 self.firstline = lineno
294 self.lastline = lineno
295 else:
296 # compute deltas
297 addr = self.codeOffset - self.lastoff
298 line = lineno - self.lastline
299 while addr > 0 or line > 0:
300 # write the values in 1-byte chunks that sum
301 # to desired value
302 trunc_addr = addr
303 trunc_line = line
304 if trunc_addr > 255:
305 trunc_addr = 255
306 if trunc_line > 255:
307 trunc_line = 255
308 self.lnotab.append(trunc_addr)
309 self.lnotab.append(trunc_line)
310 addr = addr - trunc_addr
311 line = line - trunc_line
312 self.lastline = lineno
313 self.lastoff = self.codeOffset
314
315 def getCode(self):
316 return string.join(self.code, '')
317
318 def getTable(self):
319 return string.join(map(chr, self.lnotab), '')
320
321class StackRef:
322 """Manage stack locations for jumps, loops, etc."""
323 count = 0
324
325 def __init__(self, id=None, val=None):
326 if id is None:
327 id = StackRef.count
328 StackRef.count = StackRef.count + 1
329 self.id = id
330 self.val = val
331
332 def __repr__(self):
333 if self.val:
334 return "StackRef(val=%d)" % self.val
335 else:
336 return "StackRef(id=%d)" % self.id
337
338 def bind(self, inst):
339 self.val = inst
340
341 def resolve(self):
342 if self.val is None:
343 print "UNRESOLVE REF", self
344 return 0
345 return self.val
346
347class StackDepthTracker:
348 # XXX need to keep track of stack depth on jumps
349
350 def findDepth(self, insts):
351 depth = 0
352 maxDepth = 0
353 for i in insts:
354 opname = i[0]
355 delta = self.effect.get(opname, 0)
356 if delta > 1:
357 depth = depth + delta
358 elif delta < 0:
359 if depth > maxDepth:
360 maxDepth = depth
361 depth = depth + delta
362 else:
363 if depth > maxDepth:
364 maxDepth = depth
365 # now check patterns
366 for pat, delta in self.patterns:
367 if opname[:len(pat)] == pat:
368 depth = depth + delta
369 break
370 # if we still haven't found a match
371 if delta == 0:
372 meth = getattr(self, opname)
373 depth = depth + meth(i[1])
374 if depth < 0:
375 depth = 0
376 return maxDepth
377
378 effect = {
379 'POP_TOP': -1,
380 'DUP_TOP': 1,
381 'SLICE+1': -1,
382 'SLICE+2': -1,
383 'SLICE+3': -2,
384 'STORE_SLICE+0': -1,
385 'STORE_SLICE+1': -2,
386 'STORE_SLICE+2': -2,
387 'STORE_SLICE+3': -3,
388 'DELETE_SLICE+0': -1,
389 'DELETE_SLICE+1': -2,
390 'DELETE_SLICE+2': -2,
391 'DELETE_SLICE+3': -3,
392 'STORE_SUBSCR': -3,
393 'DELETE_SUBSCR': -2,
394 # PRINT_EXPR?
395 'PRINT_ITEM': -1,
396 'LOAD_LOCALS': 1,
397 'RETURN_VALUE': -1,
398 'EXEC_STMT': -2,
399 'BUILD_CLASS': -2,
400 'STORE_NAME': -1,
401 'STORE_ATTR': -2,
402 'DELETE_ATTR': -1,
403 'STORE_GLOBAL': -1,
404 'BUILD_MAP': 1,
405 'COMPARE_OP': -1,
406 'STORE_FAST': -1,
407 }
408 # use pattern match
409 patterns = [
410 ('BINARY_', -1),
411 ('LOAD_', 1),
412 ('IMPORT_', 1),
413 ]
414 # special cases
415
416 #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE,
417 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
418 def UNPACK_TUPLE(self, count):
419 return count
420 def UNPACK_LIST(self, count):
421 return count
422 def BUILD_TUPLE(self, count):
423 return -count
424 def BUILD_LIST(self, count):
425 return -count
426 def CALL_FUNCTION(self, argc):
427 hi, lo = divmod(argc, 256)
428 return lo + hi * 2
429 def MAKE_FUNCTION(self, argc):
430 return -argc
431 def BUILD_SLICE(self, argc):
432 if argc == 2:
433 return -1
434 elif argc == 3:
435 return -2
436
437findDepth = StackDepthTracker().findDepth