Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 1 | """Python bytecode generator |
| 2 | |
| 3 | Currently contains generic ASTVisitor code, a LocalNameFinder, and a |
| 4 | CodeGenerator. Eventually, this will get split into the ASTVisitor as |
| 5 | a generic tool and CodeGenerator as a specific tool. |
| 6 | """ |
| 7 | |
| 8 | from p2c import transformer, ast |
| 9 | import dis |
| 10 | import misc |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 11 | import marshal |
| 12 | import new |
| 13 | import string |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 14 | |
| 15 | def parse(path): |
| 16 | f = open(path) |
| 17 | src = f.read() |
| 18 | f.close() |
| 19 | t = transformer.Transformer() |
| 20 | return t.parsesuite(src) |
| 21 | |
| 22 | def walk(tree, visitor): |
| 23 | w = ASTVisitor() |
| 24 | w.preorder(tree, visitor) |
| 25 | return w.visitor |
| 26 | |
| 27 | class ASTVisitor: |
| 28 | """Performs a depth-first walk of the AST |
| 29 | |
| 30 | The ASTVisitor will walk the AST, performing either a preorder or |
| 31 | postorder traversal depending on which method is called. |
| 32 | |
| 33 | methods: |
| 34 | preorder(tree, visitor) |
| 35 | postorder(tree, visitor) |
| 36 | tree: an instance of ast.Node |
| 37 | visitor: an instance with visitXXX methods |
| 38 | |
| 39 | The ASTVisitor is responsible for walking over the tree in the |
| 40 | correct order. For each node, it checks the visitor argument for |
| 41 | a method named 'visitNodeType' where NodeType is the name of the |
| 42 | node's class, e.g. Classdef. If the method exists, it is called |
| 43 | with the node as its sole argument. |
| 44 | |
| 45 | The visitor method for a particular node type can control how |
| 46 | child nodes are visited during a preorder walk. (It can't control |
| 47 | the order during a postorder walk, because it is called _after_ |
| 48 | the walk has occurred.) The ASTVisitor modifies the visitor |
| 49 | argument by adding a visit method to the visitor; this method can |
| 50 | be used to visit a particular child node. If the visitor method |
| 51 | returns a true value, the ASTVisitor will not traverse the child |
| 52 | nodes. |
| 53 | |
| 54 | XXX The interface for controlling the preorder walk needs to be |
| 55 | re-considered. The current interface is convenient for visitors |
| 56 | that mostly let the ASTVisitor do everything. For something like |
| 57 | a code generator, where you want to walk to occur in a specific |
| 58 | order, it's a pain to add "return 1" to the end of each method. |
| 59 | |
| 60 | XXX Perhaps I can use a postorder walk for the code generator? |
| 61 | """ |
| 62 | |
| 63 | VERBOSE = 0 |
| 64 | |
| 65 | def __init__(self): |
| 66 | self.node = None |
| 67 | |
| 68 | def preorder(self, tree, visitor): |
| 69 | """Do preorder walk of tree using visitor""" |
| 70 | self.visitor = visitor |
| 71 | visitor.visit = self._preorder |
| 72 | self._preorder(tree) |
| 73 | |
| 74 | def _preorder(self, node): |
| 75 | stop = self.dispatch(node) |
| 76 | if stop: |
| 77 | return |
| 78 | for child in node.getChildren(): |
| 79 | if isinstance(child, ast.Node): |
| 80 | self._preorder(child) |
| 81 | |
| 82 | def postorder(self, tree, visitor): |
| 83 | """Do preorder walk of tree using visitor""" |
| 84 | self.visitor = visitor |
| 85 | visitor.visit = self._postorder |
| 86 | self._postorder(tree) |
| 87 | |
| 88 | def _postorder(self, tree): |
| 89 | for child in node.getChildren(): |
| 90 | if isinstance(child, ast.Node): |
| 91 | self._preorder(child) |
| 92 | self.dispatch(node) |
| 93 | |
| 94 | def dispatch(self, node): |
| 95 | self.node = node |
| 96 | className = node.__class__.__name__ |
| 97 | meth = getattr(self.visitor, 'visit' + className, None) |
| 98 | if self.VERBOSE: |
| 99 | print "dispatch", className, (meth and meth.__name__ or '') |
| 100 | if meth: |
| 101 | return meth(node) |
| 102 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 103 | class CodeGenerator: |
| 104 | def __init__(self): |
| 105 | self.code = PythonVMCode() |
| 106 | self.locals = misc.Stack() |
| 107 | |
| 108 | def visitDiscard(self, node): |
| 109 | return 1 |
| 110 | |
| 111 | def visitModule(self, node): |
| 112 | lnf = walk(node.node, LocalNameFinder()) |
| 113 | self.locals.push(lnf.getLocals()) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 114 | self.visit(node.node) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 115 | self.code.emit('LOAD_CONST', 'None') |
| 116 | self.code.emit('RETURN_VALUE') |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 117 | return 1 |
| 118 | |
| 119 | def visitFunction(self, node): |
| 120 | codeBody = NestedCodeGenerator(node.code, node.argnames) |
| 121 | walk(node.code, codeBody) |
| 122 | self.code.setLineNo(node.lineno) |
| 123 | self.code.emit('LOAD_CONST', codeBody.code) |
| 124 | self.code.emit('MAKE_FUNCTION') |
| 125 | self.code.emit('STORE_NAME', node.name) |
| 126 | return 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 127 | |
| 128 | def visitCallFunc(self, node): |
| 129 | self.visit(node.node) |
| 130 | for arg in node.args: |
| 131 | self.visit(arg) |
| 132 | self.code.callFunction(len(node.args)) |
| 133 | return 1 |
| 134 | |
| 135 | def visitIf(self, node): |
| 136 | after = ForwardRef() |
| 137 | for test, suite in node.tests: |
| 138 | self.code.setLineNo(test.lineno) |
| 139 | self.visit(test) |
| 140 | dest = ForwardRef() |
| 141 | self.code.jumpIfFalse(dest) |
| 142 | self.code.popTop() |
| 143 | self.visit(suite) |
| 144 | self.code.jumpForward(after) |
| 145 | dest.bind(self.code.getCurInst()) |
| 146 | self.code.popTop() |
| 147 | if node.else_: |
| 148 | self.visit(node.else_) |
| 149 | after.bind(self.code.getCurInst()) |
| 150 | return 1 |
| 151 | |
| 152 | def visitCompare(self, node): |
| 153 | """Comment from compile.c follows: |
| 154 | |
| 155 | The following code is generated for all but the last |
| 156 | comparison in a chain: |
| 157 | |
| 158 | label: on stack: opcode: jump to: |
| 159 | |
| 160 | a <code to load b> |
| 161 | a, b DUP_TOP |
| 162 | a, b, b ROT_THREE |
| 163 | b, a, b COMPARE_OP |
| 164 | b, 0-or-1 JUMP_IF_FALSE L1 |
| 165 | b, 1 POP_TOP |
| 166 | b |
| 167 | |
| 168 | We are now ready to repeat this sequence for the next |
| 169 | comparison in the chain. |
| 170 | |
| 171 | For the last we generate: |
| 172 | |
| 173 | b <code to load c> |
| 174 | b, c COMPARE_OP |
| 175 | 0-or-1 |
| 176 | |
| 177 | If there were any jumps to L1 (i.e., there was more than one |
| 178 | comparison), we generate: |
| 179 | |
| 180 | 0-or-1 JUMP_FORWARD L2 |
| 181 | L1: b, 0 ROT_TWO |
| 182 | 0, b POP_TOP |
| 183 | 0 |
| 184 | L2: 0-or-1 |
| 185 | """ |
| 186 | self.visit(node.expr) |
| 187 | # if refs are never emitted, subsequent bind call has no effect |
| 188 | l1 = ForwardRef() |
| 189 | l2 = ForwardRef() |
| 190 | for op, code in node.ops[:-1]: |
| 191 | # emit every comparison except the last |
| 192 | self.visit(code) |
| 193 | self.code.dupTop() |
| 194 | self.code.rotThree() |
| 195 | self.code.compareOp(op) |
| 196 | self.code.jumpIfFalse(l1) |
| 197 | self.code.popTop() |
| 198 | if node.ops: |
| 199 | # emit the last comparison |
| 200 | op, code = node.ops[-1] |
| 201 | self.visit(code) |
| 202 | self.code.compareOp(op) |
| 203 | if len(node.ops) > 1: |
| 204 | self.code.jumpForward(l2) |
| 205 | l1.bind(self.code.getCurInst()) |
| 206 | self.code.rotTwo() |
| 207 | self.code.popTop() |
| 208 | l2.bind(self.code.getCurInst()) |
| 209 | return 1 |
| 210 | |
| 211 | def binaryOp(self, node, op): |
| 212 | self.visit(node.left) |
| 213 | self.visit(node.right) |
| 214 | self.code.emit(op) |
| 215 | return 1 |
| 216 | |
| 217 | def visitAdd(self, node): |
| 218 | return self.binaryOp(node, 'BINARY_ADD') |
| 219 | |
| 220 | def visitSub(self, node): |
| 221 | return self.binaryOp(node, 'BINARY_SUBTRACT') |
| 222 | |
| 223 | def visitMul(self, node): |
| 224 | return self.binaryOp(node, 'BINARY_MULTIPLY') |
| 225 | |
| 226 | def visitDiv(self, node): |
| 227 | return self.binaryOp(node, 'BINARY_DIVIDE') |
| 228 | |
| 229 | def visitName(self, node): |
| 230 | locals = self.locals.top() |
| 231 | if locals.has_elt(node.name): |
| 232 | self.code.loadFast(node.name) |
| 233 | else: |
| 234 | self.code.loadGlobal(node.name) |
| 235 | |
| 236 | def visitConst(self, node): |
| 237 | self.code.loadConst(node.value) |
| 238 | |
| 239 | def visitReturn(self, node): |
| 240 | self.code.setLineNo(node.lineno) |
| 241 | self.visit(node.value) |
| 242 | self.code.returnValue() |
| 243 | return 1 |
| 244 | |
| 245 | def visitRaise(self, node): |
| 246 | self.code.setLineNo(node.lineno) |
| 247 | n = 0 |
| 248 | if node.expr1: |
| 249 | self.visit(node.expr1) |
| 250 | n = n + 1 |
| 251 | if node.expr2: |
| 252 | self.visit(node.expr2) |
| 253 | n = n + 1 |
| 254 | if node.expr3: |
| 255 | self.visit(node.expr3) |
| 256 | n = n + 1 |
| 257 | self.code.raiseVarargs(n) |
| 258 | return 1 |
| 259 | |
| 260 | def visitPrint(self, node): |
| 261 | self.code.setLineNo(node.lineno) |
| 262 | for child in node.nodes: |
| 263 | self.visit(child) |
| 264 | self.code.emit('PRINT_ITEM') |
| 265 | return 1 |
| 266 | |
| 267 | def visitPrintnl(self, node): |
| 268 | self.visitPrint(node) |
| 269 | self.code.emit('PRINT_NEWLINE') |
| 270 | return 1 |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 271 | |
| 272 | class NestedCodeGenerator(CodeGenerator): |
| 273 | """Generate code for a function object within another scope |
| 274 | |
| 275 | XXX not clear that this subclass is needed |
| 276 | """ |
| 277 | super_init = CodeGenerator.__init__ |
| 278 | |
| 279 | def __init__(self, code, args): |
| 280 | """code and args of function or class being walked |
| 281 | |
| 282 | XXX need to separately pass to ASTVisitor. the constructor |
| 283 | only uses the code object to find the local names |
| 284 | """ |
| 285 | self.super_init() |
| 286 | lnf = walk(code, LocalNameFinder(args)) |
| 287 | self.locals.push(lnf.getLocals()) |
| 288 | |
| 289 | def visitFunction(self, node): |
| 290 | lnf = walk(node.code, LocalNameFinder(node.argnames)) |
| 291 | self.locals.push(lnf.getLocals()) |
| 292 | # XXX need to handle def foo((a, b)): |
| 293 | self.code.setLineNo(node.lineno) |
| 294 | self.visit(node.code) |
| 295 | self.code.emit('LOAD_CONST', 'None') |
| 296 | self.code.emit('RETURN_VALUE') |
| 297 | return 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 298 | |
| 299 | |
| 300 | class LocalNameFinder: |
| 301 | def __init__(self, names=()): |
| 302 | self.names = misc.Set() |
| 303 | for name in names: |
| 304 | self.names.add(name) |
| 305 | |
| 306 | def getLocals(self): |
| 307 | return self.names |
| 308 | |
| 309 | def visitFunction(self, node): |
| 310 | self.names.add(node.name) |
| 311 | return 1 |
| 312 | |
| 313 | def visitImport(self, node): |
| 314 | for name in node.names: |
| 315 | self.names.add(name) |
| 316 | |
| 317 | def visitFrom(self, node): |
| 318 | for name in node.names: |
| 319 | self.names.add(name) |
| 320 | |
| 321 | def visitClassdef(self, node): |
| 322 | self.names.add(node.name) |
| 323 | return 1 |
| 324 | |
| 325 | def visitAssName(self, node): |
| 326 | self.names.add(node.name) |
| 327 | |
| 328 | class Label: |
| 329 | def __init__(self, num): |
| 330 | self.num = num |
| 331 | def __repr__(self): |
| 332 | return "Label(%d)" % self.num |
| 333 | |
| 334 | class ForwardRef: |
| 335 | count = 0 |
| 336 | |
| 337 | def __init__(self, id=None, val=None): |
| 338 | if id is None: |
| 339 | id = ForwardRef.count |
| 340 | ForwardRef.count = ForwardRef.count + 1 |
| 341 | self.id = id |
| 342 | self.val = val |
| 343 | |
| 344 | def __repr__(self): |
| 345 | if self.val: |
| 346 | return "ForwardRef(val=%d)" % self.val |
| 347 | else: |
| 348 | return "ForwardRef(id=%d)" % self.id |
| 349 | |
| 350 | def bind(self, inst): |
| 351 | self.val = inst |
| 352 | |
| 353 | def resolve(self): |
| 354 | return self.val |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 355 | |
| 356 | class CompiledModule: |
| 357 | """Store the code object for a compiled module |
| 358 | |
| 359 | XXX Not clear how the code objects will be stored. Seems possible |
| 360 | that a single code attribute is sufficient, because it will |
| 361 | contains references to all the need code objects. That might be |
| 362 | messy, though. |
| 363 | """ |
| 364 | MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24)) |
| 365 | |
| 366 | def __init__(self): |
| 367 | self.code = None |
| 368 | |
| 369 | def addCode(self, code): |
| 370 | """addCode(self: SelfType, code: PythonVMCode)""" |
| 371 | |
| 372 | def dump(self, path): |
| 373 | """create a .pyc file""" |
| 374 | f = open(path, 'wb') |
| 375 | f.write(self._pyc_header()) |
| 376 | marshal.dump(self.code, f) |
| 377 | f.close() |
| 378 | |
| 379 | def _pyc_header(self, path): |
| 380 | # compile.c uses marshal to write a long directly, with |
| 381 | # calling the interface that would also generate a 1-byte code |
| 382 | # to indicate the type of the value. simplest way to get the |
| 383 | # same effect is to call marshal and then skip the code. |
| 384 | buf = marshal.dumps(self.MAGIC)[1:] |
| 385 | # skip the mtime for now, since I don't have the write |
| 386 | # structure to pass the filename being compiled into this |
| 387 | # instance |
| 388 | return buf + chr(0) * 4 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 389 | |
| 390 | class PythonVMCode: |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 391 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 392 | def __init__(self): |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 393 | self.insts = [] |
| 394 | # used by makeCodeObject |
| 395 | self.argcount = 0 |
| 396 | self.code = '' |
| 397 | self.consts = [] |
| 398 | self.filename = '' |
| 399 | self.firstlineno = 0 |
| 400 | self.flags = 0 |
| 401 | self.lnotab = None |
| 402 | self.name = '' |
| 403 | self.names = [] |
| 404 | self.nlocals = 0 |
| 405 | self.stacksize = 2 |
| 406 | self.varnames = [] |
| 407 | |
| 408 | def __repr__(self): |
| 409 | return "<bytecode: %d instrs>" % len(self.insts) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 410 | |
| 411 | def emit(self, *args): |
| 412 | print "emit", args |
| 413 | self.insts.append(args) |
| 414 | |
| 415 | def getCurInst(self): |
| 416 | return len(self.insts) |
| 417 | |
| 418 | def getNextInst(self): |
| 419 | return len(self.insts) + 1 |
| 420 | |
| 421 | def convert(self): |
| 422 | """Convert human-readable names to real bytecode""" |
| 423 | pass |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 424 | |
| 425 | def makeCodeObject(self): |
| 426 | """Make a Python code object""" |
| 427 | code = [] |
| 428 | self._findOffsets() |
| 429 | for t in self.insts: |
| 430 | opname = t[0] |
| 431 | if len(t) == 1: |
| 432 | code.append(chr(self.opnum[opname])) |
| 433 | elif len(t) == 2: |
| 434 | oparg = self._convertArg(opname, t[1]) |
| 435 | hi, lo = divmod(oparg, 256) |
| 436 | code.append(chr(self.opnum[opname]) + chr(lo) + chr(hi)) |
| 437 | return string.join(code, '') |
| 438 | |
| 439 | def _findOffsets(self): |
| 440 | """Find offsets for use in resolving ForwardRefs""" |
| 441 | self.offsets = [] |
| 442 | cur = 0 |
| 443 | for t in self.insts: |
| 444 | self.offsets.append(cur) |
| 445 | l = len(t) |
| 446 | if l == 1: |
| 447 | cur = cur + 1 |
| 448 | elif l == 2: |
| 449 | arg = t[1] |
| 450 | if isinstance(arg, ForwardRef): |
| 451 | arg.__offset = cur |
| 452 | cur = cur + 3 |
| 453 | |
| 454 | def _convertArg(self, op, arg): |
| 455 | """Convert the string representation of an arg to a number |
| 456 | |
| 457 | The specific handling depends on the opcode. |
| 458 | |
| 459 | XXX This first implementation isn't going to be very |
| 460 | efficient. |
| 461 | """ |
| 462 | if op == 'SET_LINENO': |
| 463 | return arg |
| 464 | if op == 'LOAD_CONST': |
| 465 | return self._lookupName(arg, self.consts) |
| 466 | if op == 'LOAD_FAST': |
| 467 | return self._lookupName(arg, self.varnames, self.names) |
| 468 | if op == 'LOAD_GLOBAL': |
| 469 | return self._lookupName(arg, self.names) |
| 470 | if op == 'STORE_NAME': |
| 471 | return self._lookupName(arg, self.names) |
| 472 | if op == 'COMPARE_OP': |
| 473 | return self.cmp_op.index(arg) |
| 474 | if self.hasjrel.has_elt(op): |
| 475 | return self.offsets[arg.resolve()] |
| 476 | if self.hasjabs.has_elt(op): |
| 477 | return self.offsets[arg.resolve()] - arg.__offset |
| 478 | print op, arg |
| 479 | return arg |
| 480 | |
| 481 | def _lookupName(self, name, list, list2=None): |
| 482 | """Return index of name in list, appending if necessary |
| 483 | |
| 484 | Yicky hack: Second list can be used for lookup of local names |
| 485 | where the name needs to be added to varnames and names. |
| 486 | """ |
| 487 | if name in list: |
| 488 | return list.index(name) |
| 489 | else: |
| 490 | end = len(list) |
| 491 | list.append(name) |
| 492 | if list2 is not None: |
| 493 | list2.append(name) |
| 494 | return end |
| 495 | |
| 496 | # Convert some stuff from the dis module for local use |
| 497 | |
| 498 | cmp_op = list(dis.cmp_op) |
| 499 | hasjrel = misc.Set() |
| 500 | for i in dis.hasjrel: |
| 501 | hasjrel.add(dis.opname[i]) |
| 502 | hasjabs = misc.Set() |
| 503 | for i in dis.hasjabs: |
| 504 | hasjabs.add(dis.opname[i]) |
| 505 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 506 | opnum = {} |
| 507 | for num in range(len(dis.opname)): |
| 508 | opnum[dis.opname[num]] = num |
| 509 | |
| 510 | # the interface below here seemed good at first. upon real use, |
| 511 | # it seems redundant to add a function for each opcode, |
| 512 | # particularly because the method and opcode basically have the |
| 513 | # same name. |
| 514 | |
| 515 | def setLineNo(self, num): |
| 516 | self.emit('SET_LINENO', num) |
| 517 | |
| 518 | def popTop(self): |
| 519 | self.emit('POP_TOP') |
| 520 | |
| 521 | def dupTop(self): |
| 522 | self.emit('DUP_TOP') |
| 523 | |
| 524 | def rotTwo(self): |
| 525 | self.emit('ROT_TWO') |
| 526 | |
| 527 | def rotThree(self): |
| 528 | self.emit('ROT_THREE') |
| 529 | |
| 530 | def jumpIfFalse(self, dest): |
| 531 | self.emit('JUMP_IF_FALSE', dest) |
| 532 | |
| 533 | def loadFast(self, name): |
| 534 | self.emit('LOAD_FAST', name) |
| 535 | |
| 536 | def loadGlobal(self, name): |
| 537 | self.emit('LOAD_GLOBAL', name) |
| 538 | |
| 539 | def binaryAdd(self): |
| 540 | self.emit('BINARY_ADD') |
| 541 | |
| 542 | def compareOp(self, op): |
| 543 | self.emit('COMPARE_OP', op) |
| 544 | |
| 545 | def loadConst(self, val): |
| 546 | self.emit('LOAD_CONST', val) |
| 547 | |
| 548 | def returnValue(self): |
| 549 | self.emit('RETURN_VALUE') |
| 550 | |
| 551 | def jumpForward(self, dest): |
| 552 | self.emit('JUMP_FORWARD', dest) |
| 553 | |
| 554 | def raiseVarargs(self, num): |
| 555 | self.emit('RAISE_VARARGS', num) |
| 556 | |
| 557 | def callFunction(self, num): |
| 558 | self.emit('CALL_FUNCTION', num) |
| 559 | |
| 560 | if __name__ == "__main__": |
| 561 | tree = parse('test.py') |
| 562 | cg = CodeGenerator() |
| 563 | ASTVisitor.VERBOSE = 1 |
| 564 | w = walk(tree, cg) |
| 565 | w.VERBOSE = 1 |
| 566 | for i in range(len(cg.code.insts)): |
| 567 | inst = cg.code.insts[i] |
| 568 | if inst[0] == 'SET_LINENO': |
| 569 | print |
| 570 | print "%4d" % i, inst |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame^] | 571 | code = cg.code.makeCodeObject() |