blob: 067ebc44bad38d95a42a33c67ea656a6946465c8 [file] [log] [blame]
Jeremy Hylton8b6323d2000-02-04 00:28:21 +00001"""Python bytecode generator
2
3Currently contains generic ASTVisitor code, a LocalNameFinder, and a
4CodeGenerator. Eventually, this will get split into the ASTVisitor as
5a generic tool and CodeGenerator as a specific tool.
6"""
7
8from p2c import transformer, ast
9import dis
10import misc
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +000011import marshal
12import new
13import string
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000014
15def parse(path):
16 f = open(path)
17 src = f.read()
18 f.close()
19 t = transformer.Transformer()
20 return t.parsesuite(src)
21
22def walk(tree, visitor):
23 w = ASTVisitor()
24 w.preorder(tree, visitor)
25 return w.visitor
26
27class ASTVisitor:
28 """Performs a depth-first walk of the AST
29
30 The ASTVisitor will walk the AST, performing either a preorder or
31 postorder traversal depending on which method is called.
32
33 methods:
34 preorder(tree, visitor)
35 postorder(tree, visitor)
36 tree: an instance of ast.Node
37 visitor: an instance with visitXXX methods
38
39 The ASTVisitor is responsible for walking over the tree in the
40 correct order. For each node, it checks the visitor argument for
41 a method named 'visitNodeType' where NodeType is the name of the
42 node's class, e.g. Classdef. If the method exists, it is called
43 with the node as its sole argument.
44
45 The visitor method for a particular node type can control how
46 child nodes are visited during a preorder walk. (It can't control
47 the order during a postorder walk, because it is called _after_
48 the walk has occurred.) The ASTVisitor modifies the visitor
49 argument by adding a visit method to the visitor; this method can
50 be used to visit a particular child node. If the visitor method
51 returns a true value, the ASTVisitor will not traverse the child
52 nodes.
53
54 XXX The interface for controlling the preorder walk needs to be
55 re-considered. The current interface is convenient for visitors
56 that mostly let the ASTVisitor do everything. For something like
57 a code generator, where you want to walk to occur in a specific
58 order, it's a pain to add "return 1" to the end of each method.
59
60 XXX Perhaps I can use a postorder walk for the code generator?
61 """
62
63 VERBOSE = 0
64
65 def __init__(self):
66 self.node = None
67
68 def preorder(self, tree, visitor):
69 """Do preorder walk of tree using visitor"""
70 self.visitor = visitor
71 visitor.visit = self._preorder
72 self._preorder(tree)
73
74 def _preorder(self, node):
75 stop = self.dispatch(node)
76 if stop:
77 return
78 for child in node.getChildren():
79 if isinstance(child, ast.Node):
80 self._preorder(child)
81
82 def postorder(self, tree, visitor):
83 """Do preorder walk of tree using visitor"""
84 self.visitor = visitor
85 visitor.visit = self._postorder
86 self._postorder(tree)
87
88 def _postorder(self, tree):
89 for child in node.getChildren():
90 if isinstance(child, ast.Node):
91 self._preorder(child)
92 self.dispatch(node)
93
94 def dispatch(self, node):
95 self.node = node
96 className = node.__class__.__name__
97 meth = getattr(self.visitor, 'visit' + className, None)
98 if self.VERBOSE:
99 print "dispatch", className, (meth and meth.__name__ or '')
100 if meth:
101 return meth(node)
102
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000103class CodeGenerator:
104 def __init__(self):
105 self.code = PythonVMCode()
106 self.locals = misc.Stack()
107
108 def visitDiscard(self, node):
109 return 1
110
111 def visitModule(self, node):
112 lnf = walk(node.node, LocalNameFinder())
113 self.locals.push(lnf.getLocals())
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000114 self.visit(node.node)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000115 self.code.emit('LOAD_CONST', 'None')
116 self.code.emit('RETURN_VALUE')
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000117 return 1
118
119 def visitFunction(self, node):
120 codeBody = NestedCodeGenerator(node.code, node.argnames)
121 walk(node.code, codeBody)
122 self.code.setLineNo(node.lineno)
123 self.code.emit('LOAD_CONST', codeBody.code)
124 self.code.emit('MAKE_FUNCTION')
125 self.code.emit('STORE_NAME', node.name)
126 return 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000127
128 def visitCallFunc(self, node):
129 self.visit(node.node)
130 for arg in node.args:
131 self.visit(arg)
132 self.code.callFunction(len(node.args))
133 return 1
134
135 def visitIf(self, node):
136 after = ForwardRef()
137 for test, suite in node.tests:
138 self.code.setLineNo(test.lineno)
139 self.visit(test)
140 dest = ForwardRef()
141 self.code.jumpIfFalse(dest)
142 self.code.popTop()
143 self.visit(suite)
144 self.code.jumpForward(after)
145 dest.bind(self.code.getCurInst())
146 self.code.popTop()
147 if node.else_:
148 self.visit(node.else_)
149 after.bind(self.code.getCurInst())
150 return 1
151
152 def visitCompare(self, node):
153 """Comment from compile.c follows:
154
155 The following code is generated for all but the last
156 comparison in a chain:
157
158 label: on stack: opcode: jump to:
159
160 a <code to load b>
161 a, b DUP_TOP
162 a, b, b ROT_THREE
163 b, a, b COMPARE_OP
164 b, 0-or-1 JUMP_IF_FALSE L1
165 b, 1 POP_TOP
166 b
167
168 We are now ready to repeat this sequence for the next
169 comparison in the chain.
170
171 For the last we generate:
172
173 b <code to load c>
174 b, c COMPARE_OP
175 0-or-1
176
177 If there were any jumps to L1 (i.e., there was more than one
178 comparison), we generate:
179
180 0-or-1 JUMP_FORWARD L2
181 L1: b, 0 ROT_TWO
182 0, b POP_TOP
183 0
184 L2: 0-or-1
185 """
186 self.visit(node.expr)
187 # if refs are never emitted, subsequent bind call has no effect
188 l1 = ForwardRef()
189 l2 = ForwardRef()
190 for op, code in node.ops[:-1]:
191 # emit every comparison except the last
192 self.visit(code)
193 self.code.dupTop()
194 self.code.rotThree()
195 self.code.compareOp(op)
196 self.code.jumpIfFalse(l1)
197 self.code.popTop()
198 if node.ops:
199 # emit the last comparison
200 op, code = node.ops[-1]
201 self.visit(code)
202 self.code.compareOp(op)
203 if len(node.ops) > 1:
204 self.code.jumpForward(l2)
205 l1.bind(self.code.getCurInst())
206 self.code.rotTwo()
207 self.code.popTop()
208 l2.bind(self.code.getCurInst())
209 return 1
210
211 def binaryOp(self, node, op):
212 self.visit(node.left)
213 self.visit(node.right)
214 self.code.emit(op)
215 return 1
216
217 def visitAdd(self, node):
218 return self.binaryOp(node, 'BINARY_ADD')
219
220 def visitSub(self, node):
221 return self.binaryOp(node, 'BINARY_SUBTRACT')
222
223 def visitMul(self, node):
224 return self.binaryOp(node, 'BINARY_MULTIPLY')
225
226 def visitDiv(self, node):
227 return self.binaryOp(node, 'BINARY_DIVIDE')
228
229 def visitName(self, node):
230 locals = self.locals.top()
231 if locals.has_elt(node.name):
232 self.code.loadFast(node.name)
233 else:
234 self.code.loadGlobal(node.name)
235
236 def visitConst(self, node):
237 self.code.loadConst(node.value)
238
239 def visitReturn(self, node):
240 self.code.setLineNo(node.lineno)
241 self.visit(node.value)
242 self.code.returnValue()
243 return 1
244
245 def visitRaise(self, node):
246 self.code.setLineNo(node.lineno)
247 n = 0
248 if node.expr1:
249 self.visit(node.expr1)
250 n = n + 1
251 if node.expr2:
252 self.visit(node.expr2)
253 n = n + 1
254 if node.expr3:
255 self.visit(node.expr3)
256 n = n + 1
257 self.code.raiseVarargs(n)
258 return 1
259
260 def visitPrint(self, node):
261 self.code.setLineNo(node.lineno)
262 for child in node.nodes:
263 self.visit(child)
264 self.code.emit('PRINT_ITEM')
265 return 1
266
267 def visitPrintnl(self, node):
268 self.visitPrint(node)
269 self.code.emit('PRINT_NEWLINE')
270 return 1
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000271
272class NestedCodeGenerator(CodeGenerator):
273 """Generate code for a function object within another scope
274
275 XXX not clear that this subclass is needed
276 """
277 super_init = CodeGenerator.__init__
278
279 def __init__(self, code, args):
280 """code and args of function or class being walked
281
282 XXX need to separately pass to ASTVisitor. the constructor
283 only uses the code object to find the local names
284 """
285 self.super_init()
286 lnf = walk(code, LocalNameFinder(args))
287 self.locals.push(lnf.getLocals())
288
289 def visitFunction(self, node):
290 lnf = walk(node.code, LocalNameFinder(node.argnames))
291 self.locals.push(lnf.getLocals())
292 # XXX need to handle def foo((a, b)):
293 self.code.setLineNo(node.lineno)
294 self.visit(node.code)
295 self.code.emit('LOAD_CONST', 'None')
296 self.code.emit('RETURN_VALUE')
297 return 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000298
299
300class LocalNameFinder:
301 def __init__(self, names=()):
302 self.names = misc.Set()
303 for name in names:
304 self.names.add(name)
305
306 def getLocals(self):
307 return self.names
308
309 def visitFunction(self, node):
310 self.names.add(node.name)
311 return 1
312
313 def visitImport(self, node):
314 for name in node.names:
315 self.names.add(name)
316
317 def visitFrom(self, node):
318 for name in node.names:
319 self.names.add(name)
320
321 def visitClassdef(self, node):
322 self.names.add(node.name)
323 return 1
324
325 def visitAssName(self, node):
326 self.names.add(node.name)
327
328class Label:
329 def __init__(self, num):
330 self.num = num
331 def __repr__(self):
332 return "Label(%d)" % self.num
333
334class ForwardRef:
335 count = 0
336
337 def __init__(self, id=None, val=None):
338 if id is None:
339 id = ForwardRef.count
340 ForwardRef.count = ForwardRef.count + 1
341 self.id = id
342 self.val = val
343
344 def __repr__(self):
345 if self.val:
346 return "ForwardRef(val=%d)" % self.val
347 else:
348 return "ForwardRef(id=%d)" % self.id
349
350 def bind(self, inst):
351 self.val = inst
352
353 def resolve(self):
354 return self.val
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000355
356class CompiledModule:
357 """Store the code object for a compiled module
358
359 XXX Not clear how the code objects will be stored. Seems possible
360 that a single code attribute is sufficient, because it will
361 contains references to all the need code objects. That might be
362 messy, though.
363 """
364 MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24))
365
366 def __init__(self):
367 self.code = None
368
369 def addCode(self, code):
370 """addCode(self: SelfType, code: PythonVMCode)"""
371
372 def dump(self, path):
373 """create a .pyc file"""
374 f = open(path, 'wb')
375 f.write(self._pyc_header())
376 marshal.dump(self.code, f)
377 f.close()
378
379 def _pyc_header(self, path):
380 # compile.c uses marshal to write a long directly, with
381 # calling the interface that would also generate a 1-byte code
382 # to indicate the type of the value. simplest way to get the
383 # same effect is to call marshal and then skip the code.
384 buf = marshal.dumps(self.MAGIC)[1:]
385 # skip the mtime for now, since I don't have the write
386 # structure to pass the filename being compiled into this
387 # instance
388 return buf + chr(0) * 4
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000389
390class PythonVMCode:
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000391
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000392 def __init__(self):
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000393 self.insts = []
394 # used by makeCodeObject
395 self.argcount = 0
396 self.code = ''
397 self.consts = []
398 self.filename = ''
399 self.firstlineno = 0
400 self.flags = 0
401 self.lnotab = None
402 self.name = ''
403 self.names = []
404 self.nlocals = 0
405 self.stacksize = 2
406 self.varnames = []
407
408 def __repr__(self):
409 return "<bytecode: %d instrs>" % len(self.insts)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000410
411 def emit(self, *args):
412 print "emit", args
413 self.insts.append(args)
414
415 def getCurInst(self):
416 return len(self.insts)
417
418 def getNextInst(self):
419 return len(self.insts) + 1
420
421 def convert(self):
422 """Convert human-readable names to real bytecode"""
423 pass
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000424
425 def makeCodeObject(self):
426 """Make a Python code object"""
427 code = []
428 self._findOffsets()
429 for t in self.insts:
430 opname = t[0]
431 if len(t) == 1:
432 code.append(chr(self.opnum[opname]))
433 elif len(t) == 2:
434 oparg = self._convertArg(opname, t[1])
435 hi, lo = divmod(oparg, 256)
436 code.append(chr(self.opnum[opname]) + chr(lo) + chr(hi))
437 return string.join(code, '')
438
439 def _findOffsets(self):
440 """Find offsets for use in resolving ForwardRefs"""
441 self.offsets = []
442 cur = 0
443 for t in self.insts:
444 self.offsets.append(cur)
445 l = len(t)
446 if l == 1:
447 cur = cur + 1
448 elif l == 2:
449 arg = t[1]
450 if isinstance(arg, ForwardRef):
451 arg.__offset = cur
452 cur = cur + 3
453
454 def _convertArg(self, op, arg):
455 """Convert the string representation of an arg to a number
456
457 The specific handling depends on the opcode.
458
459 XXX This first implementation isn't going to be very
460 efficient.
461 """
462 if op == 'SET_LINENO':
463 return arg
464 if op == 'LOAD_CONST':
465 return self._lookupName(arg, self.consts)
466 if op == 'LOAD_FAST':
467 return self._lookupName(arg, self.varnames, self.names)
468 if op == 'LOAD_GLOBAL':
469 return self._lookupName(arg, self.names)
470 if op == 'STORE_NAME':
471 return self._lookupName(arg, self.names)
472 if op == 'COMPARE_OP':
473 return self.cmp_op.index(arg)
474 if self.hasjrel.has_elt(op):
475 return self.offsets[arg.resolve()]
476 if self.hasjabs.has_elt(op):
477 return self.offsets[arg.resolve()] - arg.__offset
478 print op, arg
479 return arg
480
481 def _lookupName(self, name, list, list2=None):
482 """Return index of name in list, appending if necessary
483
484 Yicky hack: Second list can be used for lookup of local names
485 where the name needs to be added to varnames and names.
486 """
487 if name in list:
488 return list.index(name)
489 else:
490 end = len(list)
491 list.append(name)
492 if list2 is not None:
493 list2.append(name)
494 return end
495
496 # Convert some stuff from the dis module for local use
497
498 cmp_op = list(dis.cmp_op)
499 hasjrel = misc.Set()
500 for i in dis.hasjrel:
501 hasjrel.add(dis.opname[i])
502 hasjabs = misc.Set()
503 for i in dis.hasjabs:
504 hasjabs.add(dis.opname[i])
505
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000506 opnum = {}
507 for num in range(len(dis.opname)):
508 opnum[dis.opname[num]] = num
509
510 # the interface below here seemed good at first. upon real use,
511 # it seems redundant to add a function for each opcode,
512 # particularly because the method and opcode basically have the
513 # same name.
514
515 def setLineNo(self, num):
516 self.emit('SET_LINENO', num)
517
518 def popTop(self):
519 self.emit('POP_TOP')
520
521 def dupTop(self):
522 self.emit('DUP_TOP')
523
524 def rotTwo(self):
525 self.emit('ROT_TWO')
526
527 def rotThree(self):
528 self.emit('ROT_THREE')
529
530 def jumpIfFalse(self, dest):
531 self.emit('JUMP_IF_FALSE', dest)
532
533 def loadFast(self, name):
534 self.emit('LOAD_FAST', name)
535
536 def loadGlobal(self, name):
537 self.emit('LOAD_GLOBAL', name)
538
539 def binaryAdd(self):
540 self.emit('BINARY_ADD')
541
542 def compareOp(self, op):
543 self.emit('COMPARE_OP', op)
544
545 def loadConst(self, val):
546 self.emit('LOAD_CONST', val)
547
548 def returnValue(self):
549 self.emit('RETURN_VALUE')
550
551 def jumpForward(self, dest):
552 self.emit('JUMP_FORWARD', dest)
553
554 def raiseVarargs(self, num):
555 self.emit('RAISE_VARARGS', num)
556
557 def callFunction(self, num):
558 self.emit('CALL_FUNCTION', num)
559
560if __name__ == "__main__":
561 tree = parse('test.py')
562 cg = CodeGenerator()
563 ASTVisitor.VERBOSE = 1
564 w = walk(tree, cg)
565 w.VERBOSE = 1
566 for i in range(len(cg.code.insts)):
567 inst = cg.code.insts[i]
568 if inst[0] == 'SET_LINENO':
569 print
570 print "%4d" % i, inst
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000571 code = cg.code.makeCodeObject()