blob: 5ae7294a2dad1868a91bf6c470ee43b7673668d9 [file] [log] [blame]
Jeremy Hyltona5058122000-02-14 14:14:29 +00001"""Assembler for Python bytecode
2
3The new module is used to create the code object. The following
4attribute definitions are included from the reference manual:
5
6co_name gives the function name
7co_argcount is the number of positional arguments (including
8 arguments with default values)
9co_nlocals is the number of local variables used by the function
10 (including arguments)
11co_varnames is a tuple containing the names of the local variables
12 (starting with the argument names)
13co_code is a string representing the sequence of bytecode instructions
14co_consts is a tuple containing the literals used by the bytecode
15co_names is a tuple containing the names used by the bytecode
16co_filename is the filename from which the code was compiled
17co_firstlineno is the first line number of the function
18co_lnotab is a string encoding the mapping from byte code offsets
19 to line numbers. see LineAddrTable below.
20co_stacksize is the required stack size (including local variables)
21co_flags is an integer encoding a number of flags for the
22 interpreter. There are four flags:
23 CO_OPTIMIZED -- uses load fast
24 CO_NEWLOCALS -- everything?
25 CO_VARARGS -- use *args
26 CO_VARKEYWORDS -- uses **args
27
28If a code object represents a function, the first item in co_consts is
29the documentation string of the function, or None if undefined.
30"""
31
32import sys
33import dis
34import new
35import string
36
37import misc
38
39# flags for code objects
40CO_OPTIMIZED = 0x0001
41CO_NEWLOCALS = 0x0002
42CO_VARARGS = 0x0004
43CO_VARKEYWORDS = 0x0008
44
45class PyAssembler:
46 """Creates Python code objects
47 """
48
49 # XXX this class needs to major refactoring
50
51 def __init__(self, args=(), name='?', filename='<?>',
52 docstring=None):
53 # XXX why is the default value for flags 3?
54 self.insts = []
55 # used by makeCodeObject
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000056 self._getArgCount(args)
Jeremy Hyltona5058122000-02-14 14:14:29 +000057 self.code = ''
58 self.consts = [docstring]
59 self.filename = filename
60 self.flags = CO_NEWLOCALS
61 self.name = name
62 self.names = []
63 self.varnames = list(args) or []
64 # lnotab support
65 self.firstlineno = 0
66 self.lastlineno = 0
67 self.last_addr = 0
68 self.lnotab = ''
69
Jeremy Hylton2ce27b22000-02-16 00:50:29 +000070 def _getArgCount(self, args):
71 if args and args[0][0] == '.':
72 for i in range(len(args)):
73 if args[i][0] == '.':
74 num = i
75 self.argcount = num + 1
76 else:
77 self.argcount = len(args)
78
79
Jeremy Hyltona5058122000-02-14 14:14:29 +000080 def __repr__(self):
81 return "<bytecode: %d instrs>" % len(self.insts)
82
83 def setFlags(self, val):
84 """XXX for module's function"""
85 self.flags = val
86
87 def setOptimized(self):
88 self.flags = self.flags | CO_OPTIMIZED
89
90 def setVarArgs(self):
91 self.flags = self.flags | CO_VARARGS
92
93 def setKWArgs(self):
94 self.flags = self.flags | CO_VARKEYWORDS
95
96 def getCurInst(self):
97 return len(self.insts)
98
99 def getNextInst(self):
100 return len(self.insts) + 1
101
102 def dump(self, io=sys.stdout):
103 i = 0
104 for inst in self.insts:
105 if inst[0] == 'SET_LINENO':
106 io.write("\n")
107 io.write(" %3d " % i)
108 if len(inst) == 1:
109 io.write("%s\n" % inst)
110 else:
111 io.write("%-15.15s\t%s\n" % inst)
112 i = i + 1
113
114 def makeCodeObject(self):
115 """Make a Python code object
116
117 This creates a Python code object using the new module. This
118 seems simpler than reverse-engineering the way marshal dumps
119 code objects into .pyc files. One of the key difficulties is
120 figuring out how to layout references to code objects that
121 appear on the VM stack; e.g.
122 3 SET_LINENO 1
123 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
124 9 MAKE_FUNCTION 0
125 12 STORE_NAME 0 (fact)
126 """
127
128 self._findOffsets()
129 lnotab = LineAddrTable()
130 for t in self.insts:
131 opname = t[0]
132 if len(t) == 1:
133 lnotab.addCode(chr(self.opnum[opname]))
134 elif len(t) == 2:
135 oparg = self._convertArg(opname, t[1])
136 if opname == 'SET_LINENO':
137 lnotab.nextLine(oparg)
138 try:
139 hi, lo = divmod(oparg, 256)
140 except TypeError:
141 raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
142 lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
143 chr(hi))
144 # why is a module a special case?
145 if self.flags == 0:
146 nlocals = 0
147 else:
148 nlocals = len(self.varnames)
149 # XXX danger! can't pass through here twice
150 if self.flags & CO_VARKEYWORDS:
151 self.argcount = self.argcount - 1
152 stacksize = findDepth(self.insts)
Jeremy Hylton410e8402000-02-15 21:59:50 +0000153 try:
154 co = new.code(self.argcount, nlocals, stacksize,
155 self.flags, lnotab.getCode(), self._getConsts(),
156 tuple(self.names), tuple(self.varnames),
157 self.filename, self.name, self.firstlineno,
158 lnotab.getTable())
159 except SystemError, err:
160 print err
161 print repr(self.argcount)
162 print repr(nlocals)
163 print repr(stacksize)
164 print repr(self.flags)
165 print repr(lnotab.getCode())
166 print repr(self._getConsts())
167 print repr(self.names)
168 print repr(self.varnames)
169 print repr(self.filename)
170 print repr(self.name)
171 print repr(self.firstlineno)
172 print repr(lnotab.getTable())
173 raise
Jeremy Hyltona5058122000-02-14 14:14:29 +0000174 return co
175
176 def _getConsts(self):
177 """Return a tuple for the const slot of a code object
178
179 Converts PythonVMCode objects to code objects
180 """
181 l = []
182 for elt in self.consts:
183 # XXX might be clearer to just as isinstance(CodeGen)
184 if hasattr(elt, 'asConst'):
185 l.append(elt.asConst())
186 else:
187 l.append(elt)
188 return tuple(l)
189
190 def _findOffsets(self):
191 """Find offsets for use in resolving StackRefs"""
192 self.offsets = []
193 cur = 0
194 for t in self.insts:
195 self.offsets.append(cur)
196 l = len(t)
197 if l == 1:
198 cur = cur + 1
199 elif l == 2:
200 cur = cur + 3
201 arg = t[1]
202 # XXX this is a total hack: for a reference used
203 # multiple times, we create a list of offsets and
204 # expect that we when we pass through the code again
205 # to actually generate the offsets, we'll pass in the
206 # same order.
207 if isinstance(arg, StackRef):
208 try:
209 arg.__offset.append(cur)
210 except AttributeError:
211 arg.__offset = [cur]
212
213 def _convertArg(self, op, arg):
214 """Convert the string representation of an arg to a number
215
216 The specific handling depends on the opcode.
217
218 XXX This first implementation isn't going to be very
219 efficient.
220 """
221 if op == 'SET_LINENO':
222 return arg
223 if op == 'LOAD_CONST':
224 return self._lookupName(arg, self.consts)
225 if op in self.localOps:
226 # make sure it's in self.names, but use the bytecode offset
227 self._lookupName(arg, self.names)
228 return self._lookupName(arg, self.varnames)
229 if op in self.globalOps:
230 return self._lookupName(arg, self.names)
231 if op in self.nameOps:
232 return self._lookupName(arg, self.names)
233 if op == 'COMPARE_OP':
234 return self.cmp_op.index(arg)
235 if self.hasjrel.has_elt(op):
236 offset = arg.__offset[0]
237 del arg.__offset[0]
238 return self.offsets[arg.resolve()] - offset
239 if self.hasjabs.has_elt(op):
240 return self.offsets[arg.resolve()]
241 return arg
242
243 nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
Jeremy Hylton2ce27b22000-02-16 00:50:29 +0000244 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME',
245 'DELETE_ATTR')
Jeremy Hyltona5058122000-02-14 14:14:29 +0000246 localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
247 globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
248
249 def _lookupName(self, name, list, list2=None):
250 """Return index of name in list, appending if necessary
251
252 Yicky hack: Second list can be used for lookup of local names
253 where the name needs to be added to varnames and names.
254 """
255 if name in list:
256 return list.index(name)
257 else:
258 end = len(list)
259 list.append(name)
260 if list2 is not None:
261 list2.append(name)
262 return end
263
264 # Convert some stuff from the dis module for local use
265
266 cmp_op = list(dis.cmp_op)
267 hasjrel = misc.Set()
268 for i in dis.hasjrel:
269 hasjrel.add(dis.opname[i])
270 hasjabs = misc.Set()
271 for i in dis.hasjabs:
272 hasjabs.add(dis.opname[i])
273
274 opnum = {}
275 for num in range(len(dis.opname)):
276 opnum[dis.opname[num]] = num
277
278 # this version of emit + arbitrary hooks might work, but it's damn
279 # messy.
280
281 def emit(self, *args):
282 self._emitDispatch(args[0], args[1:])
283 self.insts.append(args)
284
285 def _emitDispatch(self, type, args):
286 for func in self._emit_hooks.get(type, []):
287 func(self, args)
288
289 _emit_hooks = {}
290
291class LineAddrTable:
292 """lnotab
293
294 This class builds the lnotab, which is undocumented but described
295 by com_set_lineno in compile.c. Here's an attempt at explanation:
296
297 For each SET_LINENO instruction after the first one, two bytes are
298 added to lnotab. (In some cases, multiple two-byte entries are
299 added.) The first byte is the distance in bytes between the
300 instruction for the last SET_LINENO and the current SET_LINENO.
301 The second byte is offset in line numbers. If either offset is
302 greater than 255, multiple two-byte entries are added -- one entry
303 for each factor of 255.
304 """
305
306 def __init__(self):
307 self.code = []
308 self.codeOffset = 0
309 self.firstline = 0
310 self.lastline = 0
311 self.lastoff = 0
312 self.lnotab = []
313
314 def addCode(self, code):
315 self.code.append(code)
316 self.codeOffset = self.codeOffset + len(code)
317
318 def nextLine(self, lineno):
319 if self.firstline == 0:
320 self.firstline = lineno
321 self.lastline = lineno
322 else:
323 # compute deltas
324 addr = self.codeOffset - self.lastoff
325 line = lineno - self.lastline
326 while addr > 0 or line > 0:
327 # write the values in 1-byte chunks that sum
328 # to desired value
329 trunc_addr = addr
330 trunc_line = line
331 if trunc_addr > 255:
332 trunc_addr = 255
333 if trunc_line > 255:
334 trunc_line = 255
335 self.lnotab.append(trunc_addr)
336 self.lnotab.append(trunc_line)
337 addr = addr - trunc_addr
338 line = line - trunc_line
339 self.lastline = lineno
340 self.lastoff = self.codeOffset
341
342 def getCode(self):
343 return string.join(self.code, '')
344
345 def getTable(self):
346 return string.join(map(chr, self.lnotab), '')
347
348class StackRef:
349 """Manage stack locations for jumps, loops, etc."""
350 count = 0
351
352 def __init__(self, id=None, val=None):
353 if id is None:
354 id = StackRef.count
355 StackRef.count = StackRef.count + 1
356 self.id = id
357 self.val = val
358
359 def __repr__(self):
360 if self.val:
361 return "StackRef(val=%d)" % self.val
362 else:
363 return "StackRef(id=%d)" % self.id
364
365 def bind(self, inst):
366 self.val = inst
367
368 def resolve(self):
369 if self.val is None:
370 print "UNRESOLVE REF", self
371 return 0
372 return self.val
373
374class StackDepthTracker:
375 # XXX need to keep track of stack depth on jumps
376
377 def findDepth(self, insts):
378 depth = 0
379 maxDepth = 0
380 for i in insts:
381 opname = i[0]
382 delta = self.effect.get(opname, 0)
383 if delta > 1:
384 depth = depth + delta
385 elif delta < 0:
386 if depth > maxDepth:
387 maxDepth = depth
388 depth = depth + delta
389 else:
390 if depth > maxDepth:
391 maxDepth = depth
392 # now check patterns
393 for pat, delta in self.patterns:
394 if opname[:len(pat)] == pat:
395 depth = depth + delta
396 break
397 # if we still haven't found a match
398 if delta == 0:
399 meth = getattr(self, opname)
400 depth = depth + meth(i[1])
401 if depth < 0:
402 depth = 0
403 return maxDepth
404
405 effect = {
406 'POP_TOP': -1,
407 'DUP_TOP': 1,
408 'SLICE+1': -1,
409 'SLICE+2': -1,
410 'SLICE+3': -2,
411 'STORE_SLICE+0': -1,
412 'STORE_SLICE+1': -2,
413 'STORE_SLICE+2': -2,
414 'STORE_SLICE+3': -3,
415 'DELETE_SLICE+0': -1,
416 'DELETE_SLICE+1': -2,
417 'DELETE_SLICE+2': -2,
418 'DELETE_SLICE+3': -3,
419 'STORE_SUBSCR': -3,
420 'DELETE_SUBSCR': -2,
421 # PRINT_EXPR?
422 'PRINT_ITEM': -1,
423 'LOAD_LOCALS': 1,
424 'RETURN_VALUE': -1,
425 'EXEC_STMT': -2,
426 'BUILD_CLASS': -2,
427 'STORE_NAME': -1,
428 'STORE_ATTR': -2,
429 'DELETE_ATTR': -1,
430 'STORE_GLOBAL': -1,
431 'BUILD_MAP': 1,
432 'COMPARE_OP': -1,
433 'STORE_FAST': -1,
434 }
435 # use pattern match
436 patterns = [
437 ('BINARY_', -1),
438 ('LOAD_', 1),
439 ('IMPORT_', 1),
440 ]
441 # special cases
442
443 #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE,
444 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
445 def UNPACK_TUPLE(self, count):
446 return count
447 def UNPACK_LIST(self, count):
448 return count
449 def BUILD_TUPLE(self, count):
450 return -count
451 def BUILD_LIST(self, count):
452 return -count
453 def CALL_FUNCTION(self, argc):
454 hi, lo = divmod(argc, 256)
455 return lo + hi * 2
456 def MAKE_FUNCTION(self, argc):
457 return -argc
458 def BUILD_SLICE(self, argc):
459 if argc == 2:
460 return -1
461 elif argc == 3:
462 return -2
463
464findDepth = StackDepthTracker().findDepth