blob: 92cbef67212f75bde0fd5b4b2e3b789fc8eebbf7 [file] [log] [blame]
Jeremy Hylton8b6323d2000-02-04 00:28:21 +00001"""Python bytecode generator
2
3Currently contains generic ASTVisitor code, a LocalNameFinder, and a
4CodeGenerator. Eventually, this will get split into the ASTVisitor as
5a generic tool and CodeGenerator as a specific tool.
6"""
7
8from p2c import transformer, ast
9import dis
10import misc
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +000011import marshal
12import new
13import string
Jeremy Hylton53187f32000-02-08 19:01:29 +000014import sys
15import os
16import stat
17import struct
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000018
19def parse(path):
20 f = open(path)
21 src = f.read()
22 f.close()
23 t = transformer.Transformer()
24 return t.parsesuite(src)
25
Jeremy Hylton5e0ce532000-02-10 00:47:08 +000026def walk(tree, visitor, verbose=None, walker=None):
Jeremy Hylton40245602000-02-08 21:15:48 +000027 print visitor, "start"
Jeremy Hylton5e0ce532000-02-10 00:47:08 +000028 if walker:
29 w = walker()
30 else:
31 w = ASTVisitor()
Jeremy Hylton40245602000-02-08 21:15:48 +000032 if verbose is not None:
33 w.VERBOSE = verbose
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000034 w.preorder(tree, visitor)
Jeremy Hylton40245602000-02-08 21:15:48 +000035 print visitor, "finish"
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000036 return w.visitor
37
Jeremy Hylton5e0ce532000-02-10 00:47:08 +000038def dumpNode(node):
39 print node.__class__
40 for attr in dir(node):
41 if attr[0] != '_':
42 print "\t", "%-10.10s" % attr, getattr(node, attr)
43
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000044class ASTVisitor:
45 """Performs a depth-first walk of the AST
46
47 The ASTVisitor will walk the AST, performing either a preorder or
48 postorder traversal depending on which method is called.
49
50 methods:
51 preorder(tree, visitor)
52 postorder(tree, visitor)
53 tree: an instance of ast.Node
54 visitor: an instance with visitXXX methods
55
56 The ASTVisitor is responsible for walking over the tree in the
57 correct order. For each node, it checks the visitor argument for
58 a method named 'visitNodeType' where NodeType is the name of the
59 node's class, e.g. Classdef. If the method exists, it is called
60 with the node as its sole argument.
61
62 The visitor method for a particular node type can control how
63 child nodes are visited during a preorder walk. (It can't control
64 the order during a postorder walk, because it is called _after_
65 the walk has occurred.) The ASTVisitor modifies the visitor
66 argument by adding a visit method to the visitor; this method can
67 be used to visit a particular child node. If the visitor method
68 returns a true value, the ASTVisitor will not traverse the child
69 nodes.
70
71 XXX The interface for controlling the preorder walk needs to be
72 re-considered. The current interface is convenient for visitors
73 that mostly let the ASTVisitor do everything. For something like
74 a code generator, where you want to walk to occur in a specific
75 order, it's a pain to add "return 1" to the end of each method.
76
77 XXX Perhaps I can use a postorder walk for the code generator?
78 """
79
Jeremy Hylton40245602000-02-08 21:15:48 +000080 VERBOSE = 0
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000081
82 def __init__(self):
83 self.node = None
84
85 def preorder(self, tree, visitor):
86 """Do preorder walk of tree using visitor"""
87 self.visitor = visitor
88 visitor.visit = self._preorder
89 self._preorder(tree)
90
91 def _preorder(self, node):
92 stop = self.dispatch(node)
93 if stop:
94 return
95 for child in node.getChildren():
96 if isinstance(child, ast.Node):
97 self._preorder(child)
98
99 def postorder(self, tree, visitor):
100 """Do preorder walk of tree using visitor"""
101 self.visitor = visitor
102 visitor.visit = self._postorder
103 self._postorder(tree)
104
105 def _postorder(self, tree):
106 for child in node.getChildren():
107 if isinstance(child, ast.Node):
108 self._preorder(child)
109 self.dispatch(node)
110
111 def dispatch(self, node):
112 self.node = node
113 className = node.__class__.__name__
114 meth = getattr(self.visitor, 'visit' + className, None)
Jeremy Hylton40245602000-02-08 21:15:48 +0000115 if self.VERBOSE > 0:
116 if self.VERBOSE == 1:
117 if meth is None:
118 print "dispatch", className
119 else:
120 print "dispatch", className, (meth and meth.__name__ or '')
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000121 if meth:
122 return meth(node)
123
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000124class ExampleASTVisitor(ASTVisitor):
125 """Prints examples of the nodes that aren't visited"""
126 examples = {}
127
128 def dispatch(self, node):
129 self.node = node
130 className = node.__class__.__name__
131 meth = getattr(self.visitor, 'visit' + className, None)
132 if self.VERBOSE > 0:
133 if self.VERBOSE == 1:
134 if meth is None:
135 print "dispatch", className
136 else:
137 print "dispatch", className, (meth and meth.__name__ or '')
138 if meth:
139 return meth(node)
140 else:
141 klass = node.__class__
142 if self.VERBOSE < 2:
143 if self.examples.has_key(klass):
144 return
145 self.examples[klass] = klass
146 print
147 print klass
148 for attr in dir(node):
149 if attr[0] != '_':
150 print "\t", "%-12.12s" % attr, getattr(node, attr)
151 print
152
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000153class CodeGenerator:
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000154 # XXX this should be combined with PythonVMCode. there is no
155 # clear way to split the functionality into two classes.
156
157 MODULE_NAMESPACE = 1
158 FUNCTION_NAMESPACE = 2
159
Jeremy Hylton53187f32000-02-08 19:01:29 +0000160 def __init__(self, filename=None):
161 self.filename = filename
162 self.code = PythonVMCode(filename=filename)
163 self.code.setFlags(0)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000164 self.locals = misc.Stack()
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000165 self.namespace = self.MODULE_NAMESPACE
Jeremy Hylton53187f32000-02-08 19:01:29 +0000166 self.curStack = 0
167 self.maxStack = 0
168
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000169 def generateFunctionCode(self, func, filename='<?>'):
170 """Generate code for a function body"""
171 self.name = func.name
172 self.filename = filename
173 args = func.argnames
174 self.code = PythonVMCode(len(args), name=func.name,
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000175 filename=filename, args=args)
176 self.namespace = self.FUNCTION_NAMESPACE
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000177 if func.varargs:
178 self.code.setVarArgs()
179 if func.kwargs:
180 self.code.setKWArgs()
181 lnf = walk(func.code, LocalNameFinder(args), 0)
182 self.locals.push(lnf.getLocals())
183## print func.name, "(", func.argnames, ")"
184## print lnf.getLocals().items()
185 self.code.setLineNo(func.lineno)
186 walk(func.code, self)
187 self.code.emit('LOAD_CONST', None)
188 self.code.emit('RETURN_VALUE')
189
Jeremy Hylton53187f32000-02-08 19:01:29 +0000190 def emit(self):
191 """Create a Python code object
192
193 XXX It is confusing that this method isn't related to the
194 method named emit in the PythonVMCode.
195 """
196 return self.code.makeCodeObject(self.maxStack)
197
Jeremy Hylton40245602000-02-08 21:15:48 +0000198 def isLocalName(self, name):
199 return self.locals.top().has_elt(name)
200
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000201 def _nameOp(self, prefix, name):
202 if self.isLocalName(name):
203 if self.namespace == self.MODULE_NAMESPACE:
204 self.code.emit(prefix + '_NAME', name)
205 else:
206 self.code.emit(prefix + '_FAST', name)
207 else:
208 self.code.emit(prefix + '_GLOBAL', name)
209
210 def storeName(self, name):
211 self._nameOp('STORE', name)
212
213 def loadName(self, name):
214 self._nameOp('LOAD', name)
215
216 def delName(self, name):
217 self._nameOp('DELETE', name)
218
Jeremy Hylton53187f32000-02-08 19:01:29 +0000219 def push(self, n):
220 self.curStack = self.curStack + n
221 if self.curStack > self.maxStack:
222 self.maxStack = self.curStack
223
224 def pop(self, n):
225 if n >= self.curStack:
226 self.curStack = self.curStack - n
227 else:
228 self.curStack = 0
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000229
Jeremy Hylton40245602000-02-08 21:15:48 +0000230 def assertStackEmpty(self):
231 if self.curStack != 0:
232 print "warning: stack should be empty"
233
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000234 def visitNULL(self, node):
235 """Method exists only to stop warning in -v mode"""
236 pass
237
238 visitStmt = visitNULL
239 visitGlobal = visitNULL
240
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000241 def visitDiscard(self, node):
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000242 self.visit(node.expr)
243 self.code.emit('POP_TOP')
244 self.pop(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000245 return 1
246
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000247 def visitPass(self, node):
248 self.code.setLineNo(node.lineno)
249
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000250 def visitModule(self, node):
Jeremy Hylton40245602000-02-08 21:15:48 +0000251 lnf = walk(node.node, LocalNameFinder(), 0)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000252 self.locals.push(lnf.getLocals())
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000253 self.visit(node.node)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000254 self.code.emit('LOAD_CONST', None)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000255 self.code.emit('RETURN_VALUE')
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000256 return 1
257
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000258 def visitImport(self, node):
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000259 self.code.setLineNo(node.lineno)
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000260 for name in node.names:
261 self.code.emit('IMPORT_NAME', name)
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000262 self.storeName(name)
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000263
264 def visitFrom(self, node):
265 self.code.setLineNo(node.lineno)
266 self.code.emit('IMPORT_NAME', node.modname)
267 for name in node.names:
268 self.code.emit('IMPORT_FROM', name)
269 self.code.emit('POP_TOP')
270
271 def visitFunction(self, node):
272 codeBody = CodeGenerator()
273 codeBody.generateFunctionCode(node, filename=self.filename)
274 self.code.setLineNo(node.lineno)
275 for default in node.defaults:
276 self.visit(default)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000277 self.code.emit('LOAD_CONST', codeBody)
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000278 self.code.emit('MAKE_FUNCTION', len(node.defaults))
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000279 self.storeName(node.name)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000280 return 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000281
282 def visitCallFunc(self, node):
283 self.visit(node.node)
284 for arg in node.args:
285 self.visit(arg)
286 self.code.callFunction(len(node.args))
287 return 1
288
289 def visitIf(self, node):
Jeremy Hylton40245602000-02-08 21:15:48 +0000290 after = StackRef()
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000291 for test, suite in node.tests:
Jeremy Hylton40245602000-02-08 21:15:48 +0000292 if hasattr(test, 'lineno'):
293 self.code.setLineNo(test.lineno)
294 else:
295 print "warning", "no line number"
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000296 self.visit(test)
Jeremy Hylton40245602000-02-08 21:15:48 +0000297 dest = StackRef()
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000298 self.code.jumpIfFalse(dest)
299 self.code.popTop()
300 self.visit(suite)
301 self.code.jumpForward(after)
302 dest.bind(self.code.getCurInst())
303 self.code.popTop()
304 if node.else_:
305 self.visit(node.else_)
306 after.bind(self.code.getCurInst())
307 return 1
308
Jeremy Hylton40245602000-02-08 21:15:48 +0000309 def visitFor(self, node):
310 # three refs needed
311 start = StackRef()
312 anchor = StackRef()
313 breakAnchor = StackRef()
314
315 self.code.emit('SET_LINENO', node.lineno)
316 self.code.emit('SETUP_LOOP', breakAnchor)
317 self.visit(node.list)
318 self.visit(ast.Const(0))
319 start.bind(self.code.getCurInst())
320 self.code.setLineNo(node.lineno)
321 self.code.emit('FOR_LOOP', anchor)
322 self.push(1)
323 self.visit(node.assign)
324 self.visit(node.body)
325 self.code.emit('JUMP_ABSOLUTE', start)
326 anchor.bind(self.code.getCurInst())
327 self.code.emit('POP_BLOCK')
328 if node.else_:
329 self.visit(node.else_)
330 breakAnchor.bind(self.code.getCurInst())
331 return 1
332
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000333 def visitCompare(self, node):
334 """Comment from compile.c follows:
335
336 The following code is generated for all but the last
337 comparison in a chain:
338
339 label: on stack: opcode: jump to:
340
341 a <code to load b>
342 a, b DUP_TOP
343 a, b, b ROT_THREE
344 b, a, b COMPARE_OP
345 b, 0-or-1 JUMP_IF_FALSE L1
346 b, 1 POP_TOP
347 b
348
349 We are now ready to repeat this sequence for the next
350 comparison in the chain.
351
352 For the last we generate:
353
354 b <code to load c>
355 b, c COMPARE_OP
356 0-or-1
357
358 If there were any jumps to L1 (i.e., there was more than one
359 comparison), we generate:
360
361 0-or-1 JUMP_FORWARD L2
362 L1: b, 0 ROT_TWO
363 0, b POP_TOP
364 0
365 L2: 0-or-1
366 """
367 self.visit(node.expr)
368 # if refs are never emitted, subsequent bind call has no effect
Jeremy Hylton40245602000-02-08 21:15:48 +0000369 l1 = StackRef()
370 l2 = StackRef()
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000371 for op, code in node.ops[:-1]:
372 # emit every comparison except the last
373 self.visit(code)
374 self.code.dupTop()
375 self.code.rotThree()
376 self.code.compareOp(op)
Jeremy Hylton40245602000-02-08 21:15:48 +0000377 # dupTop and compareOp cancel stack effect
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000378 self.code.jumpIfFalse(l1)
379 self.code.popTop()
Jeremy Hylton40245602000-02-08 21:15:48 +0000380 self.pop(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000381 if node.ops:
382 # emit the last comparison
383 op, code = node.ops[-1]
384 self.visit(code)
385 self.code.compareOp(op)
Jeremy Hylton40245602000-02-08 21:15:48 +0000386 self.pop(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000387 if len(node.ops) > 1:
388 self.code.jumpForward(l2)
389 l1.bind(self.code.getCurInst())
390 self.code.rotTwo()
391 self.code.popTop()
Jeremy Hylton40245602000-02-08 21:15:48 +0000392 self.pop(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000393 l2.bind(self.code.getCurInst())
394 return 1
395
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000396 def visitGetattr(self, node):
397 self.visit(node.expr)
398 self.code.emit('LOAD_ATTR', node.attrname)
399 return 1
400
401 def visitSubscript(self, node):
402 self.visit(node.expr)
403 for sub in node.subs[:-1]:
404 self.visit(sub)
405 self.code.emit('BINARY_SUBSCR')
406 self.visit(node.subs[-1])
407 if node.flags == 'OP_APPLY':
408 self.code.emit('BINARY_SUBSCR')
409 else:
410 self.code.emit('STORE_SUBSCR')
411
412 return 1
413
414 def visitSlice(self, node):
415 self.visit(node.expr)
416 slice = 0
417 if node.lower:
418 self.visit(node.lower)
419 slice = slice | 1
420 self.pop(1)
421 if node.upper:
422 self.visit(node.upper)
423 slice = slice | 2
424 self.pop(1)
425 if node.flags == 'OP_APPLY':
426 self.code.emit('SLICE+%d' % slice)
427 elif node.flags == 'OP_ASSIGN':
428 self.code.emit('STORE_SLICE+%d' % slice)
429 elif node.flags == 'OP_DELETE':
430 self.code.emit('DELETE_SLICE+%d' % slice)
431 else:
432 print node.flags
433 raise
434 return 1
435
Jeremy Hylton40245602000-02-08 21:15:48 +0000436 def visitAssign(self, node):
437 self.code.setLineNo(node.lineno)
Jeremy Hylton40245602000-02-08 21:15:48 +0000438 self.visit(node.expr)
439 for elt in node.nodes:
440 if isinstance(elt, ast.Node):
441 self.visit(elt)
442 return 1
443
444 def visitAssName(self, node):
445 if node.flags != 'OP_ASSIGN':
446 print "oops", node.flags
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000447 self.storeName(node.name)
Jeremy Hylton40245602000-02-08 21:15:48 +0000448 self.pop(1)
449
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000450 def visitAssAttr(self, node):
451 if node.flags != 'OP_ASSIGN':
452 print "warning: unexpected flags:", node.flags
453 print node
454 self.visit(node.expr)
455 self.code.emit('STORE_ATTR', node.attrname)
456 return 1
457
458 def visitAssTuple(self, node):
459 self.code.emit('UNPACK_TUPLE', len(node.nodes))
460 for child in node.nodes:
461 self.visit(child)
462 return 1
463
464 visitAssList = visitAssTuple
465
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000466 def binaryOp(self, node, op):
467 self.visit(node.left)
468 self.visit(node.right)
469 self.code.emit(op)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000470 self.pop(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000471 return 1
472
Jeremy Hylton40245602000-02-08 21:15:48 +0000473 def unaryOp(self, node, op):
474 self.visit(node.expr)
475 self.code.emit(op)
476 return 1
477
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000478 def visitAdd(self, node):
479 return self.binaryOp(node, 'BINARY_ADD')
480
481 def visitSub(self, node):
482 return self.binaryOp(node, 'BINARY_SUBTRACT')
483
484 def visitMul(self, node):
485 return self.binaryOp(node, 'BINARY_MULTIPLY')
486
487 def visitDiv(self, node):
488 return self.binaryOp(node, 'BINARY_DIVIDE')
489
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000490 def visitMod(self, node):
491 return self.binaryOp(node, 'BINARY_MODULO')
492
Jeremy Hylton40245602000-02-08 21:15:48 +0000493 def visitUnarySub(self, node):
494 return self.unaryOp(node, 'UNARY_NEGATIVE')
495
496 def visitUnaryAdd(self, node):
497 return self.unaryOp(node, 'UNARY_POSITIVE')
498
499 def visitUnaryInvert(self, node):
500 return self.unaryOp(node, 'UNARY_INVERT')
501
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000502 def visitNot(self, node):
503 return self.unaryOp(node, 'UNARY_NOT')
504
Jeremy Hylton40245602000-02-08 21:15:48 +0000505 def visitBackquote(self, node):
506 return self.unaryOp(node, 'UNARY_CONVERT')
507
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000508 def visitTest(self, node, jump):
509 end = StackRef()
510 for child in node.nodes[:-1]:
511 self.visit(child)
512 self.code.emit(jump, end)
513 self.code.emit('POP_TOP')
514 self.visit(node.nodes[-1])
515 end.bind(self.code.getCurInst())
516 return 1
517
518 def visitAnd(self, node):
519 return self.visitTest(node, 'JUMP_IF_FALSE')
520
521 def visitOr(self, node):
522 return self.visitTest(node, 'JUMP_IF_TRUE')
523
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000524 def visitName(self, node):
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000525 self.loadName(node.name)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000526 self.push(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000527
528 def visitConst(self, node):
529 self.code.loadConst(node.value)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000530 self.push(1)
Jeremy Hylton40245602000-02-08 21:15:48 +0000531 return 1
532
533 def visitTuple(self, node):
534 for elt in node.nodes:
535 self.visit(elt)
536 self.code.emit('BUILD_TUPLE', len(node.nodes))
537 self.pop(len(node.nodes))
538 return 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000539
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000540 def visitList(self, node):
541 for elt in node.nodes:
542 self.visit(elt)
543 self.code.emit('BUILD_LIST', len(node.nodes))
544 self.pop(len(node.nodes))
545 return 1
546
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000547 def visitReturn(self, node):
548 self.code.setLineNo(node.lineno)
549 self.visit(node.value)
550 self.code.returnValue()
Jeremy Hylton40245602000-02-08 21:15:48 +0000551 self.pop(1)
552 self.assertStackEmpty()
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000553 return 1
554
555 def visitRaise(self, node):
556 self.code.setLineNo(node.lineno)
557 n = 0
558 if node.expr1:
559 self.visit(node.expr1)
560 n = n + 1
561 if node.expr2:
562 self.visit(node.expr2)
563 n = n + 1
564 if node.expr3:
565 self.visit(node.expr3)
566 n = n + 1
567 self.code.raiseVarargs(n)
568 return 1
569
570 def visitPrint(self, node):
571 self.code.setLineNo(node.lineno)
572 for child in node.nodes:
573 self.visit(child)
574 self.code.emit('PRINT_ITEM')
Jeremy Hylton53187f32000-02-08 19:01:29 +0000575 self.pop(len(node.nodes))
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000576 return 1
577
578 def visitPrintnl(self, node):
579 self.visitPrint(node)
580 self.code.emit('PRINT_NEWLINE')
581 return 1
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000582
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000583class LocalNameFinder:
584 def __init__(self, names=()):
585 self.names = misc.Set()
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000586 self.globals = misc.Set()
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000587 for name in names:
588 self.names.add(name)
589
590 def getLocals(self):
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000591 for elt in self.globals.items():
592 if self.names.has_elt(elt):
593 self.names.remove(elt)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000594 return self.names
595
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000596 def visitGlobal(self, node):
597 for name in node.names:
598 self.globals.add(name)
599 return 1
600
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000601 def visitFunction(self, node):
602 self.names.add(node.name)
603 return 1
604
605 def visitImport(self, node):
606 for name in node.names:
607 self.names.add(name)
608
609 def visitFrom(self, node):
610 for name in node.names:
611 self.names.add(name)
612
613 def visitClassdef(self, node):
614 self.names.add(node.name)
615 return 1
616
617 def visitAssName(self, node):
618 self.names.add(node.name)
619
620class Label:
621 def __init__(self, num):
622 self.num = num
623 def __repr__(self):
624 return "Label(%d)" % self.num
625
Jeremy Hylton40245602000-02-08 21:15:48 +0000626class StackRef:
627 """Manage stack locations for jumps, loops, etc."""
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000628 count = 0
629
630 def __init__(self, id=None, val=None):
631 if id is None:
Jeremy Hylton40245602000-02-08 21:15:48 +0000632 id = StackRef.count
633 StackRef.count = StackRef.count + 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000634 self.id = id
635 self.val = val
636
637 def __repr__(self):
638 if self.val:
Jeremy Hylton40245602000-02-08 21:15:48 +0000639 return "StackRef(val=%d)" % self.val
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000640 else:
Jeremy Hylton40245602000-02-08 21:15:48 +0000641 return "StackRef(id=%d)" % self.id
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000642
643 def bind(self, inst):
644 self.val = inst
645
646 def resolve(self):
647 return self.val
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000648
Jeremy Hylton53187f32000-02-08 19:01:29 +0000649def add_hook(hooks, type, meth):
650 """Helper function for PythonVMCode _emit_hooks"""
651 l = hooks.get(type, [])
652 l.append(meth)
653 hooks[type] = l
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000654
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000655class PythonVMCode:
Jeremy Hylton53187f32000-02-08 19:01:29 +0000656 """Creates Python code objects
657
658 The new module is used to create the code object. The following
659 attribute definitions are included from the reference manual:
660
661 co_name gives the function name
662 co_argcount is the number of positional arguments (including
663 arguments with default values)
664 co_nlocals is the number of local variables used by the function
665 (including arguments)
666 co_varnames is a tuple containing the names of the local variables
667 (starting with the argument names)
668 co_code is a string representing the sequence of bytecode instructions
669 co_consts is a tuple containing the literals used by the bytecode
670 co_names is a tuple containing the names used by the bytecode
671 co_filename is the filename from which the code was compiled
672 co_firstlineno is the first line number of the function
673 co_lnotab is a string encoding the mapping from byte code offsets
674 to line numbers (for detais see the source code of the
675 interpreter)
676 see code com_set_lineno and com_add_lnotab
677 it's a string with 2bytes per set_lineno
678
679 co_stacksize is the required stack size (including local variables)
680 co_flags is an integer encoding a number of flags for the
681 interpreter.
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000682
Jeremy Hylton53187f32000-02-08 19:01:29 +0000683 The following flag bits are defined for co_flags: bit 2 is set if
684 the function uses the "*arguments" syntax to accept an arbitrary
685 number of positional arguments; bit 3 is set if the function uses
686 the "**keywords" syntax to accept arbitrary keyword arguments;
687 other bits are used internally or reserved for future use.
688
689 If a code object represents a function, the first item in
690 co_consts is the documentation string of the function, or None if
691 undefined.
692 """
693
694 # XXX flag bits
695 VARARGS = 0x04
696 KWARGS = 0x08
697
698 def __init__(self, argcount=0, name='?', filename='<?>',
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000699 docstring=None, args=()):
Jeremy Hylton53187f32000-02-08 19:01:29 +0000700 # XXX why is the default value for flags 3?
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000701 self.insts = []
702 # used by makeCodeObject
Jeremy Hylton53187f32000-02-08 19:01:29 +0000703 self.argcount = argcount
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000704 self.code = ''
Jeremy Hylton53187f32000-02-08 19:01:29 +0000705 self.consts = [docstring]
706 self.filename = filename
707 self.flags = 3
708 self.name = name
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000709 self.names = []
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000710 self.varnames = list(args) or []
Jeremy Hylton53187f32000-02-08 19:01:29 +0000711 # lnotab support
712 self.firstlineno = 0
713 self.lastlineno = 0
714 self.last_addr = 0
715 self.lnotab = ''
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000716
717 def __repr__(self):
718 return "<bytecode: %d instrs>" % len(self.insts)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000719
Jeremy Hylton53187f32000-02-08 19:01:29 +0000720 def setFlags(self, val):
721 """XXX for module's function"""
722 self.flags = 0
723
724 def setVarArgs(self):
725 self.flags = self.flags | self.VARARGS
726
727 def setKWArgs(self):
728 self.flags = self.flags | self.KWARGS
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000729
730 def getCurInst(self):
731 return len(self.insts)
732
733 def getNextInst(self):
734 return len(self.insts) + 1
735
Jeremy Hylton53187f32000-02-08 19:01:29 +0000736 def dump(self, io=sys.stdout):
737 i = 0
738 for inst in self.insts:
739 if inst[0] == 'SET_LINENO':
740 io.write("\n")
741 io.write(" %3d " % i)
742 if len(inst) == 1:
743 io.write("%s\n" % inst)
744 else:
745 io.write("%-15.15s\t%s\n" % inst)
746 i = i + 1
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000747
Jeremy Hylton53187f32000-02-08 19:01:29 +0000748 def makeCodeObject(self, stacksize):
749 """Make a Python code object
750
751 This creates a Python code object using the new module. This
752 seems simpler than reverse-engineering the way marshal dumps
753 code objects into .pyc files. One of the key difficulties is
754 figuring out how to layout references to code objects that
755 appear on the VM stack; e.g.
756 3 SET_LINENO 1
757 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
758 9 MAKE_FUNCTION 0
759 12 STORE_NAME 0 (fact)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000760 """
761
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000762 self._findOffsets()
Jeremy Hylton53187f32000-02-08 19:01:29 +0000763 lnotab = LineAddrTable()
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000764 for t in self.insts:
765 opname = t[0]
766 if len(t) == 1:
Jeremy Hylton53187f32000-02-08 19:01:29 +0000767 lnotab.addCode(chr(self.opnum[opname]))
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000768 elif len(t) == 2:
769 oparg = self._convertArg(opname, t[1])
Jeremy Hylton53187f32000-02-08 19:01:29 +0000770 if opname == 'SET_LINENO':
771 lnotab.nextLine(oparg)
Jeremy Hylton40245602000-02-08 21:15:48 +0000772 try:
773 hi, lo = divmod(oparg, 256)
774 except TypeError:
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000775 raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000776 lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
777 chr(hi))
778 # why is a module a special case?
779 if self.flags == 0:
780 nlocals = 0
781 else:
782 nlocals = len(self.varnames)
783 co = new.code(self.argcount, nlocals, stacksize,
784 self.flags, lnotab.getCode(), self._getConsts(),
785 tuple(self.names), tuple(self.varnames),
786 self.filename, self.name, self.firstlineno,
787 lnotab.getTable())
788 return co
789
790 def _getConsts(self):
791 """Return a tuple for the const slot of a code object
792
793 Converts PythonVMCode objects to code objects
794 """
795 l = []
796 for elt in self.consts:
797 if isinstance(elt, CodeGenerator):
798 l.append(elt.emit())
799 else:
800 l.append(elt)
801 return tuple(l)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000802
803 def _findOffsets(self):
Jeremy Hylton40245602000-02-08 21:15:48 +0000804 """Find offsets for use in resolving StackRefs"""
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000805 self.offsets = []
806 cur = 0
807 for t in self.insts:
808 self.offsets.append(cur)
809 l = len(t)
810 if l == 1:
811 cur = cur + 1
812 elif l == 2:
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000813 cur = cur + 3
Jeremy Hylton40245602000-02-08 21:15:48 +0000814 arg = t[1]
815 if isinstance(arg, StackRef):
816 arg.__offset = cur
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000817
818 def _convertArg(self, op, arg):
819 """Convert the string representation of an arg to a number
820
821 The specific handling depends on the opcode.
822
823 XXX This first implementation isn't going to be very
824 efficient.
825 """
826 if op == 'SET_LINENO':
827 return arg
828 if op == 'LOAD_CONST':
829 return self._lookupName(arg, self.consts)
Jeremy Hylton40245602000-02-08 21:15:48 +0000830 if op in self.localOps:
Jeremy Hylton53187f32000-02-08 19:01:29 +0000831 if arg in self.names:
832 return self._lookupName(arg, self.varnames)
833 else:
834 return self._lookupName(arg, self.varnames, self.names)
Jeremy Hylton40245602000-02-08 21:15:48 +0000835 if op in self.globalOps:
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000836 return self._lookupName(arg, self.names)
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000837 if op in self.nameOps:
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000838 return self._lookupName(arg, self.names)
839 if op == 'COMPARE_OP':
840 return self.cmp_op.index(arg)
841 if self.hasjrel.has_elt(op):
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000842 return self.offsets[arg.resolve()] - arg.__offset
Jeremy Hylton40245602000-02-08 21:15:48 +0000843 if self.hasjabs.has_elt(op):
844 return self.offsets[arg.resolve()]
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000845 return arg
846
Jeremy Hylton5e0ce532000-02-10 00:47:08 +0000847 nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
Jeremy Hylton3e0910c2000-02-10 17:20:39 +0000848 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME')
849 localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
850 globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
Jeremy Hylton40245602000-02-08 21:15:48 +0000851
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000852 def _lookupName(self, name, list, list2=None):
853 """Return index of name in list, appending if necessary
854
855 Yicky hack: Second list can be used for lookup of local names
856 where the name needs to be added to varnames and names.
857 """
858 if name in list:
859 return list.index(name)
860 else:
861 end = len(list)
862 list.append(name)
863 if list2 is not None:
864 list2.append(name)
865 return end
866
867 # Convert some stuff from the dis module for local use
868
869 cmp_op = list(dis.cmp_op)
870 hasjrel = misc.Set()
871 for i in dis.hasjrel:
872 hasjrel.add(dis.opname[i])
873 hasjabs = misc.Set()
874 for i in dis.hasjabs:
875 hasjabs.add(dis.opname[i])
876
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000877 opnum = {}
878 for num in range(len(dis.opname)):
879 opnum[dis.opname[num]] = num
880
881 # the interface below here seemed good at first. upon real use,
882 # it seems redundant to add a function for each opcode,
883 # particularly because the method and opcode basically have the
884 # same name.
Jeremy Hylton53187f32000-02-08 19:01:29 +0000885 # on the other hand, we need to track things like stack depth in
886 # order to generator code objects. if we wrap instructions in a
887 # method, we get an easy way to track these. a simpler
888 # approach, however, would be to define hooks that can be called
889 # by emit.
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000890
891 def setLineNo(self, num):
892 self.emit('SET_LINENO', num)
893
894 def popTop(self):
895 self.emit('POP_TOP')
896
897 def dupTop(self):
898 self.emit('DUP_TOP')
899
900 def rotTwo(self):
901 self.emit('ROT_TWO')
902
903 def rotThree(self):
904 self.emit('ROT_THREE')
905
906 def jumpIfFalse(self, dest):
907 self.emit('JUMP_IF_FALSE', dest)
908
909 def loadFast(self, name):
910 self.emit('LOAD_FAST', name)
911
912 def loadGlobal(self, name):
913 self.emit('LOAD_GLOBAL', name)
914
915 def binaryAdd(self):
916 self.emit('BINARY_ADD')
917
918 def compareOp(self, op):
919 self.emit('COMPARE_OP', op)
920
921 def loadConst(self, val):
922 self.emit('LOAD_CONST', val)
923
924 def returnValue(self):
925 self.emit('RETURN_VALUE')
926
927 def jumpForward(self, dest):
928 self.emit('JUMP_FORWARD', dest)
929
930 def raiseVarargs(self, num):
931 self.emit('RAISE_VARARGS', num)
932
933 def callFunction(self, num):
934 self.emit('CALL_FUNCTION', num)
935
Jeremy Hylton53187f32000-02-08 19:01:29 +0000936 # this version of emit + arbitrary hooks might work, but it's damn
937 # messy.
938
939 def emit(self, *args):
940 self._emitDispatch(args[0], args[1:])
941 self.insts.append(args)
942
943 def _emitDispatch(self, type, args):
944 for func in self._emit_hooks.get(type, []):
945 func(self, args)
946
947 _emit_hooks = {}
948
949class LineAddrTable:
950 """lnotab
951
952 This class builds the lnotab, which is undocumented but described
953 by com_set_lineno in compile.c. Here's an attempt at explanation:
954
955 For each SET_LINENO instruction after the first one, two bytes are
956 added to lnotab. (In some cases, multiple two-byte entries are
957 added.) The first byte is the distance in bytes between the
958 instruction for the last SET_LINENO and the current SET_LINENO.
959 The second byte is offset in line numbers. If either offset is
960 greater than 255, multiple two-byte entries are added -- one entry
961 for each factor of 255.
962 """
963
964 def __init__(self):
965 self.code = []
966 self.codeOffset = 0
967 self.firstline = 0
968 self.lastline = 0
969 self.lastoff = 0
970 self.lnotab = []
971
972 def addCode(self, code):
973 self.code.append(code)
974 self.codeOffset = self.codeOffset + len(code)
975
976 def nextLine(self, lineno):
977 if self.firstline == 0:
978 self.firstline = lineno
979 self.lastline = lineno
980 else:
981 # compute deltas
982 addr = self.codeOffset - self.lastoff
983 line = lineno - self.lastline
984 while addr > 0 or line > 0:
985 # write the values in 1-byte chunks that sum
986 # to desired value
987 trunc_addr = addr
988 trunc_line = line
989 if trunc_addr > 255:
990 trunc_addr = 255
991 if trunc_line > 255:
992 trunc_line = 255
993 self.lnotab.append(trunc_addr)
994 self.lnotab.append(trunc_line)
995 addr = addr - trunc_addr
996 line = line - trunc_line
997 self.lastline = lineno
998 self.lastoff = self.codeOffset
999
1000 def getCode(self):
1001 return string.join(self.code, '')
1002
1003 def getTable(self):
1004 return string.join(map(chr, self.lnotab), '')
1005
1006class CompiledModule:
1007 """Store the code object for a compiled module
1008
1009 XXX Not clear how the code objects will be stored. Seems possible
1010 that a single code attribute is sufficient, because it will
1011 contains references to all the need code objects. That might be
1012 messy, though.
1013 """
1014 MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24))
1015
1016 def __init__(self, source, filename):
1017 self.source = source
1018 self.filename = filename
1019
1020 def compile(self):
1021 t = transformer.Transformer()
1022 self.ast = t.parsesuite(self.source)
1023 cg = CodeGenerator(self.filename)
Jeremy Hylton5e0ce532000-02-10 00:47:08 +00001024 walk(self.ast, cg, walker=ExampleASTVisitor)
Jeremy Hylton53187f32000-02-08 19:01:29 +00001025 self.code = cg.emit()
1026
1027 def dump(self, path):
1028 """create a .pyc file"""
1029 f = open(path, 'wb')
1030 f.write(self._pyc_header())
1031 marshal.dump(self.code, f)
1032 f.close()
1033
1034 def _pyc_header(self):
1035 # compile.c uses marshal to write a long directly, with
1036 # calling the interface that would also generate a 1-byte code
1037 # to indicate the type of the value. simplest way to get the
1038 # same effect is to call marshal and then skip the code.
1039 magic = marshal.dumps(self.MAGIC)[1:]
1040 mtime = os.stat(self.filename)[stat.ST_MTIME]
1041 mtime = struct.pack('i', mtime)
1042 return magic + mtime
1043
Jeremy Hylton8b6323d2000-02-04 00:28:21 +00001044if __name__ == "__main__":
Jeremy Hylton40245602000-02-08 21:15:48 +00001045 import getopt
1046
Jeremy Hylton5e0ce532000-02-10 00:47:08 +00001047 opts, args = getopt.getopt(sys.argv[1:], 'vq')
Jeremy Hylton40245602000-02-08 21:15:48 +00001048 for k, v in opts:
1049 if k == '-v':
1050 ASTVisitor.VERBOSE = ASTVisitor.VERBOSE + 1
1051 print k
Jeremy Hylton5e0ce532000-02-10 00:47:08 +00001052 if k == '-q':
1053 f = open('/dev/null', 'wb')
1054 sys.stdout = f
Jeremy Hylton40245602000-02-08 21:15:48 +00001055 if args:
1056 filename = args[0]
Jeremy Hylton53187f32000-02-08 19:01:29 +00001057 else:
1058 filename = 'test.py'
1059 buf = open(filename).read()
1060 mod = CompiledModule(buf, filename)
1061 mod.compile()
1062 mod.dump(filename + 'c')