Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 1 | """Python bytecode generator |
| 2 | |
| 3 | Currently contains generic ASTVisitor code, a LocalNameFinder, and a |
| 4 | CodeGenerator. Eventually, this will get split into the ASTVisitor as |
| 5 | a generic tool and CodeGenerator as a specific tool. |
| 6 | """ |
| 7 | |
| 8 | from p2c import transformer, ast |
| 9 | import dis |
| 10 | import misc |
| 11 | |
| 12 | def parse(path): |
| 13 | f = open(path) |
| 14 | src = f.read() |
| 15 | f.close() |
| 16 | t = transformer.Transformer() |
| 17 | return t.parsesuite(src) |
| 18 | |
| 19 | def walk(tree, visitor): |
| 20 | w = ASTVisitor() |
| 21 | w.preorder(tree, visitor) |
| 22 | return w.visitor |
| 23 | |
| 24 | class ASTVisitor: |
| 25 | """Performs a depth-first walk of the AST |
| 26 | |
| 27 | The ASTVisitor will walk the AST, performing either a preorder or |
| 28 | postorder traversal depending on which method is called. |
| 29 | |
| 30 | methods: |
| 31 | preorder(tree, visitor) |
| 32 | postorder(tree, visitor) |
| 33 | tree: an instance of ast.Node |
| 34 | visitor: an instance with visitXXX methods |
| 35 | |
| 36 | The ASTVisitor is responsible for walking over the tree in the |
| 37 | correct order. For each node, it checks the visitor argument for |
| 38 | a method named 'visitNodeType' where NodeType is the name of the |
| 39 | node's class, e.g. Classdef. If the method exists, it is called |
| 40 | with the node as its sole argument. |
| 41 | |
| 42 | The visitor method for a particular node type can control how |
| 43 | child nodes are visited during a preorder walk. (It can't control |
| 44 | the order during a postorder walk, because it is called _after_ |
| 45 | the walk has occurred.) The ASTVisitor modifies the visitor |
| 46 | argument by adding a visit method to the visitor; this method can |
| 47 | be used to visit a particular child node. If the visitor method |
| 48 | returns a true value, the ASTVisitor will not traverse the child |
| 49 | nodes. |
| 50 | |
| 51 | XXX The interface for controlling the preorder walk needs to be |
| 52 | re-considered. The current interface is convenient for visitors |
| 53 | that mostly let the ASTVisitor do everything. For something like |
| 54 | a code generator, where you want to walk to occur in a specific |
| 55 | order, it's a pain to add "return 1" to the end of each method. |
| 56 | |
| 57 | XXX Perhaps I can use a postorder walk for the code generator? |
| 58 | """ |
| 59 | |
| 60 | VERBOSE = 0 |
| 61 | |
| 62 | def __init__(self): |
| 63 | self.node = None |
| 64 | |
| 65 | def preorder(self, tree, visitor): |
| 66 | """Do preorder walk of tree using visitor""" |
| 67 | self.visitor = visitor |
| 68 | visitor.visit = self._preorder |
| 69 | self._preorder(tree) |
| 70 | |
| 71 | def _preorder(self, node): |
| 72 | stop = self.dispatch(node) |
| 73 | if stop: |
| 74 | return |
| 75 | for child in node.getChildren(): |
| 76 | if isinstance(child, ast.Node): |
| 77 | self._preorder(child) |
| 78 | |
| 79 | def postorder(self, tree, visitor): |
| 80 | """Do preorder walk of tree using visitor""" |
| 81 | self.visitor = visitor |
| 82 | visitor.visit = self._postorder |
| 83 | self._postorder(tree) |
| 84 | |
| 85 | def _postorder(self, tree): |
| 86 | for child in node.getChildren(): |
| 87 | if isinstance(child, ast.Node): |
| 88 | self._preorder(child) |
| 89 | self.dispatch(node) |
| 90 | |
| 91 | def dispatch(self, node): |
| 92 | self.node = node |
| 93 | className = node.__class__.__name__ |
| 94 | meth = getattr(self.visitor, 'visit' + className, None) |
| 95 | if self.VERBOSE: |
| 96 | print "dispatch", className, (meth and meth.__name__ or '') |
| 97 | if meth: |
| 98 | return meth(node) |
| 99 | |
| 100 | |
| 101 | class CodeGenerator: |
| 102 | def __init__(self): |
| 103 | self.code = PythonVMCode() |
| 104 | self.locals = misc.Stack() |
| 105 | |
| 106 | def visitDiscard(self, node): |
| 107 | return 1 |
| 108 | |
| 109 | def visitModule(self, node): |
| 110 | lnf = walk(node.node, LocalNameFinder()) |
| 111 | self.locals.push(lnf.getLocals()) |
| 112 | |
| 113 | def visitFunction(self, node): |
| 114 | lnf = walk(node.code, LocalNameFinder(node.argnames)) |
| 115 | self.locals.push(lnf.getLocals()) |
| 116 | self.code.setLineNo(node.lineno) |
| 117 | print node.code |
| 118 | self.visit(node.code) |
| 119 | self.code.emit('LOAD_CONST', 'None') |
| 120 | self.code.emit('RETURN_VALUE') |
| 121 | |
| 122 | def visitCallFunc(self, node): |
| 123 | self.visit(node.node) |
| 124 | for arg in node.args: |
| 125 | self.visit(arg) |
| 126 | self.code.callFunction(len(node.args)) |
| 127 | return 1 |
| 128 | |
| 129 | def visitIf(self, node): |
| 130 | after = ForwardRef() |
| 131 | for test, suite in node.tests: |
| 132 | self.code.setLineNo(test.lineno) |
| 133 | self.visit(test) |
| 134 | dest = ForwardRef() |
| 135 | self.code.jumpIfFalse(dest) |
| 136 | self.code.popTop() |
| 137 | self.visit(suite) |
| 138 | self.code.jumpForward(after) |
| 139 | dest.bind(self.code.getCurInst()) |
| 140 | self.code.popTop() |
| 141 | if node.else_: |
| 142 | self.visit(node.else_) |
| 143 | after.bind(self.code.getCurInst()) |
| 144 | return 1 |
| 145 | |
| 146 | def visitCompare(self, node): |
| 147 | """Comment from compile.c follows: |
| 148 | |
| 149 | The following code is generated for all but the last |
| 150 | comparison in a chain: |
| 151 | |
| 152 | label: on stack: opcode: jump to: |
| 153 | |
| 154 | a <code to load b> |
| 155 | a, b DUP_TOP |
| 156 | a, b, b ROT_THREE |
| 157 | b, a, b COMPARE_OP |
| 158 | b, 0-or-1 JUMP_IF_FALSE L1 |
| 159 | b, 1 POP_TOP |
| 160 | b |
| 161 | |
| 162 | We are now ready to repeat this sequence for the next |
| 163 | comparison in the chain. |
| 164 | |
| 165 | For the last we generate: |
| 166 | |
| 167 | b <code to load c> |
| 168 | b, c COMPARE_OP |
| 169 | 0-or-1 |
| 170 | |
| 171 | If there were any jumps to L1 (i.e., there was more than one |
| 172 | comparison), we generate: |
| 173 | |
| 174 | 0-or-1 JUMP_FORWARD L2 |
| 175 | L1: b, 0 ROT_TWO |
| 176 | 0, b POP_TOP |
| 177 | 0 |
| 178 | L2: 0-or-1 |
| 179 | """ |
| 180 | self.visit(node.expr) |
| 181 | # if refs are never emitted, subsequent bind call has no effect |
| 182 | l1 = ForwardRef() |
| 183 | l2 = ForwardRef() |
| 184 | for op, code in node.ops[:-1]: |
| 185 | # emit every comparison except the last |
| 186 | self.visit(code) |
| 187 | self.code.dupTop() |
| 188 | self.code.rotThree() |
| 189 | self.code.compareOp(op) |
| 190 | self.code.jumpIfFalse(l1) |
| 191 | self.code.popTop() |
| 192 | if node.ops: |
| 193 | # emit the last comparison |
| 194 | op, code = node.ops[-1] |
| 195 | self.visit(code) |
| 196 | self.code.compareOp(op) |
| 197 | if len(node.ops) > 1: |
| 198 | self.code.jumpForward(l2) |
| 199 | l1.bind(self.code.getCurInst()) |
| 200 | self.code.rotTwo() |
| 201 | self.code.popTop() |
| 202 | l2.bind(self.code.getCurInst()) |
| 203 | return 1 |
| 204 | |
| 205 | def binaryOp(self, node, op): |
| 206 | self.visit(node.left) |
| 207 | self.visit(node.right) |
| 208 | self.code.emit(op) |
| 209 | return 1 |
| 210 | |
| 211 | def visitAdd(self, node): |
| 212 | return self.binaryOp(node, 'BINARY_ADD') |
| 213 | |
| 214 | def visitSub(self, node): |
| 215 | return self.binaryOp(node, 'BINARY_SUBTRACT') |
| 216 | |
| 217 | def visitMul(self, node): |
| 218 | return self.binaryOp(node, 'BINARY_MULTIPLY') |
| 219 | |
| 220 | def visitDiv(self, node): |
| 221 | return self.binaryOp(node, 'BINARY_DIVIDE') |
| 222 | |
| 223 | def visitName(self, node): |
| 224 | locals = self.locals.top() |
| 225 | if locals.has_elt(node.name): |
| 226 | self.code.loadFast(node.name) |
| 227 | else: |
| 228 | self.code.loadGlobal(node.name) |
| 229 | |
| 230 | def visitConst(self, node): |
| 231 | self.code.loadConst(node.value) |
| 232 | |
| 233 | def visitReturn(self, node): |
| 234 | self.code.setLineNo(node.lineno) |
| 235 | self.visit(node.value) |
| 236 | self.code.returnValue() |
| 237 | return 1 |
| 238 | |
| 239 | def visitRaise(self, node): |
| 240 | self.code.setLineNo(node.lineno) |
| 241 | n = 0 |
| 242 | if node.expr1: |
| 243 | self.visit(node.expr1) |
| 244 | n = n + 1 |
| 245 | if node.expr2: |
| 246 | self.visit(node.expr2) |
| 247 | n = n + 1 |
| 248 | if node.expr3: |
| 249 | self.visit(node.expr3) |
| 250 | n = n + 1 |
| 251 | self.code.raiseVarargs(n) |
| 252 | return 1 |
| 253 | |
| 254 | def visitPrint(self, node): |
| 255 | self.code.setLineNo(node.lineno) |
| 256 | for child in node.nodes: |
| 257 | self.visit(child) |
| 258 | self.code.emit('PRINT_ITEM') |
| 259 | return 1 |
| 260 | |
| 261 | def visitPrintnl(self, node): |
| 262 | self.visitPrint(node) |
| 263 | self.code.emit('PRINT_NEWLINE') |
| 264 | return 1 |
| 265 | |
| 266 | |
| 267 | class LocalNameFinder: |
| 268 | def __init__(self, names=()): |
| 269 | self.names = misc.Set() |
| 270 | for name in names: |
| 271 | self.names.add(name) |
| 272 | |
| 273 | def getLocals(self): |
| 274 | return self.names |
| 275 | |
| 276 | def visitFunction(self, node): |
| 277 | self.names.add(node.name) |
| 278 | return 1 |
| 279 | |
| 280 | def visitImport(self, node): |
| 281 | for name in node.names: |
| 282 | self.names.add(name) |
| 283 | |
| 284 | def visitFrom(self, node): |
| 285 | for name in node.names: |
| 286 | self.names.add(name) |
| 287 | |
| 288 | def visitClassdef(self, node): |
| 289 | self.names.add(node.name) |
| 290 | return 1 |
| 291 | |
| 292 | def visitAssName(self, node): |
| 293 | self.names.add(node.name) |
| 294 | |
| 295 | class Label: |
| 296 | def __init__(self, num): |
| 297 | self.num = num |
| 298 | def __repr__(self): |
| 299 | return "Label(%d)" % self.num |
| 300 | |
| 301 | class ForwardRef: |
| 302 | count = 0 |
| 303 | |
| 304 | def __init__(self, id=None, val=None): |
| 305 | if id is None: |
| 306 | id = ForwardRef.count |
| 307 | ForwardRef.count = ForwardRef.count + 1 |
| 308 | self.id = id |
| 309 | self.val = val |
| 310 | |
| 311 | def __repr__(self): |
| 312 | if self.val: |
| 313 | return "ForwardRef(val=%d)" % self.val |
| 314 | else: |
| 315 | return "ForwardRef(id=%d)" % self.id |
| 316 | |
| 317 | def bind(self, inst): |
| 318 | self.val = inst |
| 319 | |
| 320 | def resolve(self): |
| 321 | return self.val |
| 322 | |
| 323 | class PythonVMCode: |
| 324 | def __init__(self): |
| 325 | self.insts = [] |
| 326 | |
| 327 | def emit(self, *args): |
| 328 | print "emit", args |
| 329 | self.insts.append(args) |
| 330 | |
| 331 | def getCurInst(self): |
| 332 | return len(self.insts) |
| 333 | |
| 334 | def getNextInst(self): |
| 335 | return len(self.insts) + 1 |
| 336 | |
| 337 | def convert(self): |
| 338 | """Convert human-readable names to real bytecode""" |
| 339 | pass |
| 340 | |
| 341 | opnum = {} |
| 342 | for num in range(len(dis.opname)): |
| 343 | opnum[dis.opname[num]] = num |
| 344 | |
| 345 | # the interface below here seemed good at first. upon real use, |
| 346 | # it seems redundant to add a function for each opcode, |
| 347 | # particularly because the method and opcode basically have the |
| 348 | # same name. |
| 349 | |
| 350 | def setLineNo(self, num): |
| 351 | self.emit('SET_LINENO', num) |
| 352 | |
| 353 | def popTop(self): |
| 354 | self.emit('POP_TOP') |
| 355 | |
| 356 | def dupTop(self): |
| 357 | self.emit('DUP_TOP') |
| 358 | |
| 359 | def rotTwo(self): |
| 360 | self.emit('ROT_TWO') |
| 361 | |
| 362 | def rotThree(self): |
| 363 | self.emit('ROT_THREE') |
| 364 | |
| 365 | def jumpIfFalse(self, dest): |
| 366 | self.emit('JUMP_IF_FALSE', dest) |
| 367 | |
| 368 | def loadFast(self, name): |
| 369 | self.emit('LOAD_FAST', name) |
| 370 | |
| 371 | def loadGlobal(self, name): |
| 372 | self.emit('LOAD_GLOBAL', name) |
| 373 | |
| 374 | def binaryAdd(self): |
| 375 | self.emit('BINARY_ADD') |
| 376 | |
| 377 | def compareOp(self, op): |
| 378 | self.emit('COMPARE_OP', op) |
| 379 | |
| 380 | def loadConst(self, val): |
| 381 | self.emit('LOAD_CONST', val) |
| 382 | |
| 383 | def returnValue(self): |
| 384 | self.emit('RETURN_VALUE') |
| 385 | |
| 386 | def jumpForward(self, dest): |
| 387 | self.emit('JUMP_FORWARD', dest) |
| 388 | |
| 389 | def raiseVarargs(self, num): |
| 390 | self.emit('RAISE_VARARGS', num) |
| 391 | |
| 392 | def callFunction(self, num): |
| 393 | self.emit('CALL_FUNCTION', num) |
| 394 | |
| 395 | if __name__ == "__main__": |
| 396 | tree = parse('test.py') |
| 397 | cg = CodeGenerator() |
| 398 | ASTVisitor.VERBOSE = 1 |
| 399 | w = walk(tree, cg) |
| 400 | w.VERBOSE = 1 |
| 401 | for i in range(len(cg.code.insts)): |
| 402 | inst = cg.code.insts[i] |
| 403 | if inst[0] == 'SET_LINENO': |
| 404 | print |
| 405 | print "%4d" % i, inst |
| 406 | |