Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 1 | """Python bytecode generator |
| 2 | |
| 3 | Currently contains generic ASTVisitor code, a LocalNameFinder, and a |
| 4 | CodeGenerator. Eventually, this will get split into the ASTVisitor as |
| 5 | a generic tool and CodeGenerator as a specific tool. |
| 6 | """ |
| 7 | |
| 8 | from p2c import transformer, ast |
| 9 | import dis |
| 10 | import misc |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 11 | import marshal |
| 12 | import new |
| 13 | import string |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 14 | import sys |
| 15 | import os |
| 16 | import stat |
| 17 | import struct |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 18 | |
| 19 | def parse(path): |
| 20 | f = open(path) |
| 21 | src = f.read() |
| 22 | f.close() |
| 23 | t = transformer.Transformer() |
| 24 | return t.parsesuite(src) |
| 25 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 26 | def walk(tree, visitor, verbose=None, walker=None): |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 27 | print visitor, "start" |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 28 | if walker: |
| 29 | w = walker() |
| 30 | else: |
| 31 | w = ASTVisitor() |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 32 | if verbose is not None: |
| 33 | w.VERBOSE = verbose |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 34 | w.preorder(tree, visitor) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 35 | print visitor, "finish" |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 36 | return w.visitor |
| 37 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 38 | def dumpNode(node): |
| 39 | print node.__class__ |
| 40 | for attr in dir(node): |
| 41 | if attr[0] != '_': |
| 42 | print "\t", "%-10.10s" % attr, getattr(node, attr) |
| 43 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 44 | class ASTVisitor: |
| 45 | """Performs a depth-first walk of the AST |
| 46 | |
| 47 | The ASTVisitor will walk the AST, performing either a preorder or |
| 48 | postorder traversal depending on which method is called. |
| 49 | |
| 50 | methods: |
| 51 | preorder(tree, visitor) |
| 52 | postorder(tree, visitor) |
| 53 | tree: an instance of ast.Node |
| 54 | visitor: an instance with visitXXX methods |
| 55 | |
| 56 | The ASTVisitor is responsible for walking over the tree in the |
| 57 | correct order. For each node, it checks the visitor argument for |
| 58 | a method named 'visitNodeType' where NodeType is the name of the |
| 59 | node's class, e.g. Classdef. If the method exists, it is called |
| 60 | with the node as its sole argument. |
| 61 | |
| 62 | The visitor method for a particular node type can control how |
| 63 | child nodes are visited during a preorder walk. (It can't control |
| 64 | the order during a postorder walk, because it is called _after_ |
| 65 | the walk has occurred.) The ASTVisitor modifies the visitor |
| 66 | argument by adding a visit method to the visitor; this method can |
| 67 | be used to visit a particular child node. If the visitor method |
| 68 | returns a true value, the ASTVisitor will not traverse the child |
| 69 | nodes. |
| 70 | |
| 71 | XXX The interface for controlling the preorder walk needs to be |
| 72 | re-considered. The current interface is convenient for visitors |
| 73 | that mostly let the ASTVisitor do everything. For something like |
| 74 | a code generator, where you want to walk to occur in a specific |
| 75 | order, it's a pain to add "return 1" to the end of each method. |
| 76 | |
| 77 | XXX Perhaps I can use a postorder walk for the code generator? |
| 78 | """ |
| 79 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 80 | VERBOSE = 0 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 81 | |
| 82 | def __init__(self): |
| 83 | self.node = None |
| 84 | |
| 85 | def preorder(self, tree, visitor): |
| 86 | """Do preorder walk of tree using visitor""" |
| 87 | self.visitor = visitor |
| 88 | visitor.visit = self._preorder |
| 89 | self._preorder(tree) |
| 90 | |
| 91 | def _preorder(self, node): |
| 92 | stop = self.dispatch(node) |
| 93 | if stop: |
| 94 | return |
| 95 | for child in node.getChildren(): |
| 96 | if isinstance(child, ast.Node): |
| 97 | self._preorder(child) |
| 98 | |
| 99 | def postorder(self, tree, visitor): |
| 100 | """Do preorder walk of tree using visitor""" |
| 101 | self.visitor = visitor |
| 102 | visitor.visit = self._postorder |
| 103 | self._postorder(tree) |
| 104 | |
| 105 | def _postorder(self, tree): |
| 106 | for child in node.getChildren(): |
| 107 | if isinstance(child, ast.Node): |
| 108 | self._preorder(child) |
| 109 | self.dispatch(node) |
| 110 | |
| 111 | def dispatch(self, node): |
| 112 | self.node = node |
| 113 | className = node.__class__.__name__ |
| 114 | meth = getattr(self.visitor, 'visit' + className, None) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 115 | if self.VERBOSE > 0: |
| 116 | if self.VERBOSE == 1: |
| 117 | if meth is None: |
| 118 | print "dispatch", className |
| 119 | else: |
| 120 | print "dispatch", className, (meth and meth.__name__ or '') |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 121 | if meth: |
| 122 | return meth(node) |
| 123 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 124 | class ExampleASTVisitor(ASTVisitor): |
| 125 | """Prints examples of the nodes that aren't visited""" |
| 126 | examples = {} |
| 127 | |
| 128 | def dispatch(self, node): |
| 129 | self.node = node |
| 130 | className = node.__class__.__name__ |
| 131 | meth = getattr(self.visitor, 'visit' + className, None) |
| 132 | if self.VERBOSE > 0: |
| 133 | if self.VERBOSE == 1: |
| 134 | if meth is None: |
| 135 | print "dispatch", className |
| 136 | else: |
| 137 | print "dispatch", className, (meth and meth.__name__ or '') |
| 138 | if meth: |
| 139 | return meth(node) |
| 140 | else: |
| 141 | klass = node.__class__ |
| 142 | if self.VERBOSE < 2: |
| 143 | if self.examples.has_key(klass): |
| 144 | return |
| 145 | self.examples[klass] = klass |
| 146 | print |
| 147 | print klass |
| 148 | for attr in dir(node): |
| 149 | if attr[0] != '_': |
| 150 | print "\t", "%-12.12s" % attr, getattr(node, attr) |
| 151 | print |
| 152 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 153 | class CodeGenerator: |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 154 | # XXX this should be combined with PythonVMCode. there is no |
| 155 | # clear way to split the functionality into two classes. |
| 156 | |
| 157 | MODULE_NAMESPACE = 1 |
| 158 | FUNCTION_NAMESPACE = 2 |
| 159 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 160 | def __init__(self, filename=None): |
| 161 | self.filename = filename |
| 162 | self.code = PythonVMCode(filename=filename) |
| 163 | self.code.setFlags(0) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 164 | self.locals = misc.Stack() |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 165 | self.namespace = self.MODULE_NAMESPACE |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 166 | self.curStack = 0 |
| 167 | self.maxStack = 0 |
| 168 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 169 | def generateFunctionCode(self, func, filename='<?>'): |
| 170 | """Generate code for a function body""" |
| 171 | self.name = func.name |
| 172 | self.filename = filename |
| 173 | args = func.argnames |
| 174 | self.code = PythonVMCode(len(args), name=func.name, |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 175 | filename=filename, args=args) |
| 176 | self.namespace = self.FUNCTION_NAMESPACE |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 177 | if func.varargs: |
| 178 | self.code.setVarArgs() |
| 179 | if func.kwargs: |
| 180 | self.code.setKWArgs() |
| 181 | lnf = walk(func.code, LocalNameFinder(args), 0) |
| 182 | self.locals.push(lnf.getLocals()) |
| 183 | ## print func.name, "(", func.argnames, ")" |
| 184 | ## print lnf.getLocals().items() |
| 185 | self.code.setLineNo(func.lineno) |
| 186 | walk(func.code, self) |
| 187 | self.code.emit('LOAD_CONST', None) |
| 188 | self.code.emit('RETURN_VALUE') |
| 189 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 190 | def emit(self): |
| 191 | """Create a Python code object |
| 192 | |
| 193 | XXX It is confusing that this method isn't related to the |
| 194 | method named emit in the PythonVMCode. |
| 195 | """ |
| 196 | return self.code.makeCodeObject(self.maxStack) |
| 197 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 198 | def isLocalName(self, name): |
| 199 | return self.locals.top().has_elt(name) |
| 200 | |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 201 | def _nameOp(self, prefix, name): |
| 202 | if self.isLocalName(name): |
| 203 | if self.namespace == self.MODULE_NAMESPACE: |
| 204 | self.code.emit(prefix + '_NAME', name) |
| 205 | else: |
| 206 | self.code.emit(prefix + '_FAST', name) |
| 207 | else: |
| 208 | self.code.emit(prefix + '_GLOBAL', name) |
| 209 | |
| 210 | def storeName(self, name): |
| 211 | self._nameOp('STORE', name) |
| 212 | |
| 213 | def loadName(self, name): |
| 214 | self._nameOp('LOAD', name) |
| 215 | |
| 216 | def delName(self, name): |
| 217 | self._nameOp('DELETE', name) |
| 218 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 219 | def push(self, n): |
| 220 | self.curStack = self.curStack + n |
| 221 | if self.curStack > self.maxStack: |
| 222 | self.maxStack = self.curStack |
| 223 | |
| 224 | def pop(self, n): |
| 225 | if n >= self.curStack: |
| 226 | self.curStack = self.curStack - n |
| 227 | else: |
| 228 | self.curStack = 0 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 229 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 230 | def assertStackEmpty(self): |
| 231 | if self.curStack != 0: |
| 232 | print "warning: stack should be empty" |
| 233 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 234 | def visitNULL(self, node): |
| 235 | """Method exists only to stop warning in -v mode""" |
| 236 | pass |
| 237 | |
| 238 | visitStmt = visitNULL |
| 239 | visitGlobal = visitNULL |
| 240 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 241 | def visitDiscard(self, node): |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 242 | self.visit(node.expr) |
| 243 | self.code.emit('POP_TOP') |
| 244 | self.pop(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 245 | return 1 |
| 246 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 247 | def visitPass(self, node): |
| 248 | self.code.setLineNo(node.lineno) |
| 249 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 250 | def visitModule(self, node): |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 251 | lnf = walk(node.node, LocalNameFinder(), 0) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 252 | self.locals.push(lnf.getLocals()) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 253 | self.visit(node.node) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 254 | self.code.emit('LOAD_CONST', None) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 255 | self.code.emit('RETURN_VALUE') |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 256 | return 1 |
| 257 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 258 | def visitImport(self, node): |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 259 | self.code.setLineNo(node.lineno) |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 260 | for name in node.names: |
| 261 | self.code.emit('IMPORT_NAME', name) |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 262 | self.storeName(name) |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 263 | |
| 264 | def visitFrom(self, node): |
| 265 | self.code.setLineNo(node.lineno) |
| 266 | self.code.emit('IMPORT_NAME', node.modname) |
| 267 | for name in node.names: |
| 268 | self.code.emit('IMPORT_FROM', name) |
| 269 | self.code.emit('POP_TOP') |
| 270 | |
| 271 | def visitFunction(self, node): |
| 272 | codeBody = CodeGenerator() |
| 273 | codeBody.generateFunctionCode(node, filename=self.filename) |
| 274 | self.code.setLineNo(node.lineno) |
| 275 | for default in node.defaults: |
| 276 | self.visit(default) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 277 | self.code.emit('LOAD_CONST', codeBody) |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 278 | self.code.emit('MAKE_FUNCTION', len(node.defaults)) |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 279 | self.storeName(node.name) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 280 | return 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 281 | |
| 282 | def visitCallFunc(self, node): |
| 283 | self.visit(node.node) |
| 284 | for arg in node.args: |
| 285 | self.visit(arg) |
| 286 | self.code.callFunction(len(node.args)) |
| 287 | return 1 |
| 288 | |
| 289 | def visitIf(self, node): |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 290 | after = StackRef() |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 291 | for test, suite in node.tests: |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 292 | if hasattr(test, 'lineno'): |
| 293 | self.code.setLineNo(test.lineno) |
| 294 | else: |
| 295 | print "warning", "no line number" |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 296 | self.visit(test) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 297 | dest = StackRef() |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 298 | self.code.jumpIfFalse(dest) |
| 299 | self.code.popTop() |
| 300 | self.visit(suite) |
| 301 | self.code.jumpForward(after) |
| 302 | dest.bind(self.code.getCurInst()) |
| 303 | self.code.popTop() |
| 304 | if node.else_: |
| 305 | self.visit(node.else_) |
| 306 | after.bind(self.code.getCurInst()) |
| 307 | return 1 |
| 308 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 309 | def visitFor(self, node): |
| 310 | # three refs needed |
| 311 | start = StackRef() |
| 312 | anchor = StackRef() |
| 313 | breakAnchor = StackRef() |
| 314 | |
| 315 | self.code.emit('SET_LINENO', node.lineno) |
| 316 | self.code.emit('SETUP_LOOP', breakAnchor) |
| 317 | self.visit(node.list) |
| 318 | self.visit(ast.Const(0)) |
| 319 | start.bind(self.code.getCurInst()) |
| 320 | self.code.setLineNo(node.lineno) |
| 321 | self.code.emit('FOR_LOOP', anchor) |
| 322 | self.push(1) |
| 323 | self.visit(node.assign) |
| 324 | self.visit(node.body) |
| 325 | self.code.emit('JUMP_ABSOLUTE', start) |
| 326 | anchor.bind(self.code.getCurInst()) |
| 327 | self.code.emit('POP_BLOCK') |
| 328 | if node.else_: |
| 329 | self.visit(node.else_) |
| 330 | breakAnchor.bind(self.code.getCurInst()) |
| 331 | return 1 |
| 332 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 333 | def visitCompare(self, node): |
| 334 | """Comment from compile.c follows: |
| 335 | |
| 336 | The following code is generated for all but the last |
| 337 | comparison in a chain: |
| 338 | |
| 339 | label: on stack: opcode: jump to: |
| 340 | |
| 341 | a <code to load b> |
| 342 | a, b DUP_TOP |
| 343 | a, b, b ROT_THREE |
| 344 | b, a, b COMPARE_OP |
| 345 | b, 0-or-1 JUMP_IF_FALSE L1 |
| 346 | b, 1 POP_TOP |
| 347 | b |
| 348 | |
| 349 | We are now ready to repeat this sequence for the next |
| 350 | comparison in the chain. |
| 351 | |
| 352 | For the last we generate: |
| 353 | |
| 354 | b <code to load c> |
| 355 | b, c COMPARE_OP |
| 356 | 0-or-1 |
| 357 | |
| 358 | If there were any jumps to L1 (i.e., there was more than one |
| 359 | comparison), we generate: |
| 360 | |
| 361 | 0-or-1 JUMP_FORWARD L2 |
| 362 | L1: b, 0 ROT_TWO |
| 363 | 0, b POP_TOP |
| 364 | 0 |
| 365 | L2: 0-or-1 |
| 366 | """ |
| 367 | self.visit(node.expr) |
| 368 | # if refs are never emitted, subsequent bind call has no effect |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 369 | l1 = StackRef() |
| 370 | l2 = StackRef() |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 371 | for op, code in node.ops[:-1]: |
| 372 | # emit every comparison except the last |
| 373 | self.visit(code) |
| 374 | self.code.dupTop() |
| 375 | self.code.rotThree() |
| 376 | self.code.compareOp(op) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 377 | # dupTop and compareOp cancel stack effect |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 378 | self.code.jumpIfFalse(l1) |
| 379 | self.code.popTop() |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 380 | self.pop(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 381 | if node.ops: |
| 382 | # emit the last comparison |
| 383 | op, code = node.ops[-1] |
| 384 | self.visit(code) |
| 385 | self.code.compareOp(op) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 386 | self.pop(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 387 | if len(node.ops) > 1: |
| 388 | self.code.jumpForward(l2) |
| 389 | l1.bind(self.code.getCurInst()) |
| 390 | self.code.rotTwo() |
| 391 | self.code.popTop() |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 392 | self.pop(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 393 | l2.bind(self.code.getCurInst()) |
| 394 | return 1 |
| 395 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 396 | def visitGetattr(self, node): |
| 397 | self.visit(node.expr) |
| 398 | self.code.emit('LOAD_ATTR', node.attrname) |
| 399 | return 1 |
| 400 | |
| 401 | def visitSubscript(self, node): |
| 402 | self.visit(node.expr) |
| 403 | for sub in node.subs[:-1]: |
| 404 | self.visit(sub) |
| 405 | self.code.emit('BINARY_SUBSCR') |
| 406 | self.visit(node.subs[-1]) |
| 407 | if node.flags == 'OP_APPLY': |
| 408 | self.code.emit('BINARY_SUBSCR') |
| 409 | else: |
| 410 | self.code.emit('STORE_SUBSCR') |
| 411 | |
| 412 | return 1 |
| 413 | |
| 414 | def visitSlice(self, node): |
| 415 | self.visit(node.expr) |
| 416 | slice = 0 |
| 417 | if node.lower: |
| 418 | self.visit(node.lower) |
| 419 | slice = slice | 1 |
| 420 | self.pop(1) |
| 421 | if node.upper: |
| 422 | self.visit(node.upper) |
| 423 | slice = slice | 2 |
| 424 | self.pop(1) |
| 425 | if node.flags == 'OP_APPLY': |
| 426 | self.code.emit('SLICE+%d' % slice) |
| 427 | elif node.flags == 'OP_ASSIGN': |
| 428 | self.code.emit('STORE_SLICE+%d' % slice) |
| 429 | elif node.flags == 'OP_DELETE': |
| 430 | self.code.emit('DELETE_SLICE+%d' % slice) |
| 431 | else: |
| 432 | print node.flags |
| 433 | raise |
| 434 | return 1 |
| 435 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 436 | def visitAssign(self, node): |
| 437 | self.code.setLineNo(node.lineno) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 438 | self.visit(node.expr) |
| 439 | for elt in node.nodes: |
| 440 | if isinstance(elt, ast.Node): |
| 441 | self.visit(elt) |
| 442 | return 1 |
| 443 | |
| 444 | def visitAssName(self, node): |
| 445 | if node.flags != 'OP_ASSIGN': |
| 446 | print "oops", node.flags |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 447 | self.storeName(node.name) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 448 | self.pop(1) |
| 449 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 450 | def visitAssAttr(self, node): |
| 451 | if node.flags != 'OP_ASSIGN': |
| 452 | print "warning: unexpected flags:", node.flags |
| 453 | print node |
| 454 | self.visit(node.expr) |
| 455 | self.code.emit('STORE_ATTR', node.attrname) |
| 456 | return 1 |
| 457 | |
| 458 | def visitAssTuple(self, node): |
| 459 | self.code.emit('UNPACK_TUPLE', len(node.nodes)) |
| 460 | for child in node.nodes: |
| 461 | self.visit(child) |
| 462 | return 1 |
| 463 | |
| 464 | visitAssList = visitAssTuple |
| 465 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 466 | def binaryOp(self, node, op): |
| 467 | self.visit(node.left) |
| 468 | self.visit(node.right) |
| 469 | self.code.emit(op) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 470 | self.pop(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 471 | return 1 |
| 472 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 473 | def unaryOp(self, node, op): |
| 474 | self.visit(node.expr) |
| 475 | self.code.emit(op) |
| 476 | return 1 |
| 477 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 478 | def visitAdd(self, node): |
| 479 | return self.binaryOp(node, 'BINARY_ADD') |
| 480 | |
| 481 | def visitSub(self, node): |
| 482 | return self.binaryOp(node, 'BINARY_SUBTRACT') |
| 483 | |
| 484 | def visitMul(self, node): |
| 485 | return self.binaryOp(node, 'BINARY_MULTIPLY') |
| 486 | |
| 487 | def visitDiv(self, node): |
| 488 | return self.binaryOp(node, 'BINARY_DIVIDE') |
| 489 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 490 | def visitMod(self, node): |
| 491 | return self.binaryOp(node, 'BINARY_MODULO') |
| 492 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 493 | def visitUnarySub(self, node): |
| 494 | return self.unaryOp(node, 'UNARY_NEGATIVE') |
| 495 | |
| 496 | def visitUnaryAdd(self, node): |
| 497 | return self.unaryOp(node, 'UNARY_POSITIVE') |
| 498 | |
| 499 | def visitUnaryInvert(self, node): |
| 500 | return self.unaryOp(node, 'UNARY_INVERT') |
| 501 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 502 | def visitNot(self, node): |
| 503 | return self.unaryOp(node, 'UNARY_NOT') |
| 504 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 505 | def visitBackquote(self, node): |
| 506 | return self.unaryOp(node, 'UNARY_CONVERT') |
| 507 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 508 | def visitTest(self, node, jump): |
| 509 | end = StackRef() |
| 510 | for child in node.nodes[:-1]: |
| 511 | self.visit(child) |
| 512 | self.code.emit(jump, end) |
| 513 | self.code.emit('POP_TOP') |
| 514 | self.visit(node.nodes[-1]) |
| 515 | end.bind(self.code.getCurInst()) |
| 516 | return 1 |
| 517 | |
| 518 | def visitAnd(self, node): |
| 519 | return self.visitTest(node, 'JUMP_IF_FALSE') |
| 520 | |
| 521 | def visitOr(self, node): |
| 522 | return self.visitTest(node, 'JUMP_IF_TRUE') |
| 523 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 524 | def visitName(self, node): |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 525 | self.loadName(node.name) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 526 | self.push(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 527 | |
| 528 | def visitConst(self, node): |
| 529 | self.code.loadConst(node.value) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 530 | self.push(1) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 531 | return 1 |
| 532 | |
| 533 | def visitTuple(self, node): |
| 534 | for elt in node.nodes: |
| 535 | self.visit(elt) |
| 536 | self.code.emit('BUILD_TUPLE', len(node.nodes)) |
| 537 | self.pop(len(node.nodes)) |
| 538 | return 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 539 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 540 | def visitList(self, node): |
| 541 | for elt in node.nodes: |
| 542 | self.visit(elt) |
| 543 | self.code.emit('BUILD_LIST', len(node.nodes)) |
| 544 | self.pop(len(node.nodes)) |
| 545 | return 1 |
| 546 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 547 | def visitReturn(self, node): |
| 548 | self.code.setLineNo(node.lineno) |
| 549 | self.visit(node.value) |
| 550 | self.code.returnValue() |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 551 | self.pop(1) |
| 552 | self.assertStackEmpty() |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 553 | return 1 |
| 554 | |
| 555 | def visitRaise(self, node): |
| 556 | self.code.setLineNo(node.lineno) |
| 557 | n = 0 |
| 558 | if node.expr1: |
| 559 | self.visit(node.expr1) |
| 560 | n = n + 1 |
| 561 | if node.expr2: |
| 562 | self.visit(node.expr2) |
| 563 | n = n + 1 |
| 564 | if node.expr3: |
| 565 | self.visit(node.expr3) |
| 566 | n = n + 1 |
| 567 | self.code.raiseVarargs(n) |
| 568 | return 1 |
| 569 | |
| 570 | def visitPrint(self, node): |
| 571 | self.code.setLineNo(node.lineno) |
| 572 | for child in node.nodes: |
| 573 | self.visit(child) |
| 574 | self.code.emit('PRINT_ITEM') |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 575 | self.pop(len(node.nodes)) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 576 | return 1 |
| 577 | |
| 578 | def visitPrintnl(self, node): |
| 579 | self.visitPrint(node) |
| 580 | self.code.emit('PRINT_NEWLINE') |
| 581 | return 1 |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 582 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 583 | class LocalNameFinder: |
| 584 | def __init__(self, names=()): |
| 585 | self.names = misc.Set() |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 586 | self.globals = misc.Set() |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 587 | for name in names: |
| 588 | self.names.add(name) |
| 589 | |
| 590 | def getLocals(self): |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 591 | for elt in self.globals.items(): |
| 592 | if self.names.has_elt(elt): |
| 593 | self.names.remove(elt) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 594 | return self.names |
| 595 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 596 | def visitGlobal(self, node): |
| 597 | for name in node.names: |
| 598 | self.globals.add(name) |
| 599 | return 1 |
| 600 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 601 | def visitFunction(self, node): |
| 602 | self.names.add(node.name) |
| 603 | return 1 |
| 604 | |
| 605 | def visitImport(self, node): |
| 606 | for name in node.names: |
| 607 | self.names.add(name) |
| 608 | |
| 609 | def visitFrom(self, node): |
| 610 | for name in node.names: |
| 611 | self.names.add(name) |
| 612 | |
| 613 | def visitClassdef(self, node): |
| 614 | self.names.add(node.name) |
| 615 | return 1 |
| 616 | |
| 617 | def visitAssName(self, node): |
| 618 | self.names.add(node.name) |
| 619 | |
| 620 | class Label: |
| 621 | def __init__(self, num): |
| 622 | self.num = num |
| 623 | def __repr__(self): |
| 624 | return "Label(%d)" % self.num |
| 625 | |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 626 | class StackRef: |
| 627 | """Manage stack locations for jumps, loops, etc.""" |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 628 | count = 0 |
| 629 | |
| 630 | def __init__(self, id=None, val=None): |
| 631 | if id is None: |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 632 | id = StackRef.count |
| 633 | StackRef.count = StackRef.count + 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 634 | self.id = id |
| 635 | self.val = val |
| 636 | |
| 637 | def __repr__(self): |
| 638 | if self.val: |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 639 | return "StackRef(val=%d)" % self.val |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 640 | else: |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 641 | return "StackRef(id=%d)" % self.id |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 642 | |
| 643 | def bind(self, inst): |
| 644 | self.val = inst |
| 645 | |
| 646 | def resolve(self): |
| 647 | return self.val |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 648 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 649 | def add_hook(hooks, type, meth): |
| 650 | """Helper function for PythonVMCode _emit_hooks""" |
| 651 | l = hooks.get(type, []) |
| 652 | l.append(meth) |
| 653 | hooks[type] = l |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 654 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 655 | class PythonVMCode: |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 656 | """Creates Python code objects |
| 657 | |
| 658 | The new module is used to create the code object. The following |
| 659 | attribute definitions are included from the reference manual: |
| 660 | |
| 661 | co_name gives the function name |
| 662 | co_argcount is the number of positional arguments (including |
| 663 | arguments with default values) |
| 664 | co_nlocals is the number of local variables used by the function |
| 665 | (including arguments) |
| 666 | co_varnames is a tuple containing the names of the local variables |
| 667 | (starting with the argument names) |
| 668 | co_code is a string representing the sequence of bytecode instructions |
| 669 | co_consts is a tuple containing the literals used by the bytecode |
| 670 | co_names is a tuple containing the names used by the bytecode |
| 671 | co_filename is the filename from which the code was compiled |
| 672 | co_firstlineno is the first line number of the function |
| 673 | co_lnotab is a string encoding the mapping from byte code offsets |
| 674 | to line numbers (for detais see the source code of the |
| 675 | interpreter) |
| 676 | see code com_set_lineno and com_add_lnotab |
| 677 | it's a string with 2bytes per set_lineno |
| 678 | |
| 679 | co_stacksize is the required stack size (including local variables) |
| 680 | co_flags is an integer encoding a number of flags for the |
| 681 | interpreter. |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 682 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 683 | The following flag bits are defined for co_flags: bit 2 is set if |
| 684 | the function uses the "*arguments" syntax to accept an arbitrary |
| 685 | number of positional arguments; bit 3 is set if the function uses |
| 686 | the "**keywords" syntax to accept arbitrary keyword arguments; |
| 687 | other bits are used internally or reserved for future use. |
| 688 | |
| 689 | If a code object represents a function, the first item in |
| 690 | co_consts is the documentation string of the function, or None if |
| 691 | undefined. |
| 692 | """ |
| 693 | |
| 694 | # XXX flag bits |
| 695 | VARARGS = 0x04 |
| 696 | KWARGS = 0x08 |
| 697 | |
| 698 | def __init__(self, argcount=0, name='?', filename='<?>', |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 699 | docstring=None, args=()): |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 700 | # XXX why is the default value for flags 3? |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 701 | self.insts = [] |
| 702 | # used by makeCodeObject |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 703 | self.argcount = argcount |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 704 | self.code = '' |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 705 | self.consts = [docstring] |
| 706 | self.filename = filename |
| 707 | self.flags = 3 |
| 708 | self.name = name |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 709 | self.names = [] |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 710 | self.varnames = list(args) or [] |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 711 | # lnotab support |
| 712 | self.firstlineno = 0 |
| 713 | self.lastlineno = 0 |
| 714 | self.last_addr = 0 |
| 715 | self.lnotab = '' |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 716 | |
| 717 | def __repr__(self): |
| 718 | return "<bytecode: %d instrs>" % len(self.insts) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 719 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 720 | def setFlags(self, val): |
| 721 | """XXX for module's function""" |
| 722 | self.flags = 0 |
| 723 | |
| 724 | def setVarArgs(self): |
| 725 | self.flags = self.flags | self.VARARGS |
| 726 | |
| 727 | def setKWArgs(self): |
| 728 | self.flags = self.flags | self.KWARGS |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 729 | |
| 730 | def getCurInst(self): |
| 731 | return len(self.insts) |
| 732 | |
| 733 | def getNextInst(self): |
| 734 | return len(self.insts) + 1 |
| 735 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 736 | def dump(self, io=sys.stdout): |
| 737 | i = 0 |
| 738 | for inst in self.insts: |
| 739 | if inst[0] == 'SET_LINENO': |
| 740 | io.write("\n") |
| 741 | io.write(" %3d " % i) |
| 742 | if len(inst) == 1: |
| 743 | io.write("%s\n" % inst) |
| 744 | else: |
| 745 | io.write("%-15.15s\t%s\n" % inst) |
| 746 | i = i + 1 |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 747 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 748 | def makeCodeObject(self, stacksize): |
| 749 | """Make a Python code object |
| 750 | |
| 751 | This creates a Python code object using the new module. This |
| 752 | seems simpler than reverse-engineering the way marshal dumps |
| 753 | code objects into .pyc files. One of the key difficulties is |
| 754 | figuring out how to layout references to code objects that |
| 755 | appear on the VM stack; e.g. |
| 756 | 3 SET_LINENO 1 |
| 757 | 6 LOAD_CONST 0 (<code object fact at 8115878 [...] |
| 758 | 9 MAKE_FUNCTION 0 |
| 759 | 12 STORE_NAME 0 (fact) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 760 | """ |
| 761 | |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 762 | self._findOffsets() |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 763 | lnotab = LineAddrTable() |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 764 | for t in self.insts: |
| 765 | opname = t[0] |
| 766 | if len(t) == 1: |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 767 | lnotab.addCode(chr(self.opnum[opname])) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 768 | elif len(t) == 2: |
| 769 | oparg = self._convertArg(opname, t[1]) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 770 | if opname == 'SET_LINENO': |
| 771 | lnotab.nextLine(oparg) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 772 | try: |
| 773 | hi, lo = divmod(oparg, 256) |
| 774 | except TypeError: |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 775 | raise TypeError, "untranslated arg: %s, %s" % (opname, oparg) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 776 | lnotab.addCode(chr(self.opnum[opname]) + chr(lo) + |
| 777 | chr(hi)) |
| 778 | # why is a module a special case? |
| 779 | if self.flags == 0: |
| 780 | nlocals = 0 |
| 781 | else: |
| 782 | nlocals = len(self.varnames) |
| 783 | co = new.code(self.argcount, nlocals, stacksize, |
| 784 | self.flags, lnotab.getCode(), self._getConsts(), |
| 785 | tuple(self.names), tuple(self.varnames), |
| 786 | self.filename, self.name, self.firstlineno, |
| 787 | lnotab.getTable()) |
| 788 | return co |
| 789 | |
| 790 | def _getConsts(self): |
| 791 | """Return a tuple for the const slot of a code object |
| 792 | |
| 793 | Converts PythonVMCode objects to code objects |
| 794 | """ |
| 795 | l = [] |
| 796 | for elt in self.consts: |
| 797 | if isinstance(elt, CodeGenerator): |
| 798 | l.append(elt.emit()) |
| 799 | else: |
| 800 | l.append(elt) |
| 801 | return tuple(l) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 802 | |
| 803 | def _findOffsets(self): |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 804 | """Find offsets for use in resolving StackRefs""" |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 805 | self.offsets = [] |
| 806 | cur = 0 |
| 807 | for t in self.insts: |
| 808 | self.offsets.append(cur) |
| 809 | l = len(t) |
| 810 | if l == 1: |
| 811 | cur = cur + 1 |
| 812 | elif l == 2: |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 813 | cur = cur + 3 |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 814 | arg = t[1] |
| 815 | if isinstance(arg, StackRef): |
| 816 | arg.__offset = cur |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 817 | |
| 818 | def _convertArg(self, op, arg): |
| 819 | """Convert the string representation of an arg to a number |
| 820 | |
| 821 | The specific handling depends on the opcode. |
| 822 | |
| 823 | XXX This first implementation isn't going to be very |
| 824 | efficient. |
| 825 | """ |
| 826 | if op == 'SET_LINENO': |
| 827 | return arg |
| 828 | if op == 'LOAD_CONST': |
| 829 | return self._lookupName(arg, self.consts) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 830 | if op in self.localOps: |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 831 | if arg in self.names: |
| 832 | return self._lookupName(arg, self.varnames) |
| 833 | else: |
| 834 | return self._lookupName(arg, self.varnames, self.names) |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 835 | if op in self.globalOps: |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 836 | return self._lookupName(arg, self.names) |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 837 | if op in self.nameOps: |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 838 | return self._lookupName(arg, self.names) |
| 839 | if op == 'COMPARE_OP': |
| 840 | return self.cmp_op.index(arg) |
| 841 | if self.hasjrel.has_elt(op): |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 842 | return self.offsets[arg.resolve()] - arg.__offset |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 843 | if self.hasjabs.has_elt(op): |
| 844 | return self.offsets[arg.resolve()] |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 845 | return arg |
| 846 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 847 | nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM', |
Jeremy Hylton | 3e0910c | 2000-02-10 17:20:39 +0000 | [diff] [blame] | 848 | 'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME') |
| 849 | localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST') |
| 850 | globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL') |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 851 | |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 852 | def _lookupName(self, name, list, list2=None): |
| 853 | """Return index of name in list, appending if necessary |
| 854 | |
| 855 | Yicky hack: Second list can be used for lookup of local names |
| 856 | where the name needs to be added to varnames and names. |
| 857 | """ |
| 858 | if name in list: |
| 859 | return list.index(name) |
| 860 | else: |
| 861 | end = len(list) |
| 862 | list.append(name) |
| 863 | if list2 is not None: |
| 864 | list2.append(name) |
| 865 | return end |
| 866 | |
| 867 | # Convert some stuff from the dis module for local use |
| 868 | |
| 869 | cmp_op = list(dis.cmp_op) |
| 870 | hasjrel = misc.Set() |
| 871 | for i in dis.hasjrel: |
| 872 | hasjrel.add(dis.opname[i]) |
| 873 | hasjabs = misc.Set() |
| 874 | for i in dis.hasjabs: |
| 875 | hasjabs.add(dis.opname[i]) |
| 876 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 877 | opnum = {} |
| 878 | for num in range(len(dis.opname)): |
| 879 | opnum[dis.opname[num]] = num |
| 880 | |
| 881 | # the interface below here seemed good at first. upon real use, |
| 882 | # it seems redundant to add a function for each opcode, |
| 883 | # particularly because the method and opcode basically have the |
| 884 | # same name. |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 885 | # on the other hand, we need to track things like stack depth in |
| 886 | # order to generator code objects. if we wrap instructions in a |
| 887 | # method, we get an easy way to track these. a simpler |
| 888 | # approach, however, would be to define hooks that can be called |
| 889 | # by emit. |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 890 | |
| 891 | def setLineNo(self, num): |
| 892 | self.emit('SET_LINENO', num) |
| 893 | |
| 894 | def popTop(self): |
| 895 | self.emit('POP_TOP') |
| 896 | |
| 897 | def dupTop(self): |
| 898 | self.emit('DUP_TOP') |
| 899 | |
| 900 | def rotTwo(self): |
| 901 | self.emit('ROT_TWO') |
| 902 | |
| 903 | def rotThree(self): |
| 904 | self.emit('ROT_THREE') |
| 905 | |
| 906 | def jumpIfFalse(self, dest): |
| 907 | self.emit('JUMP_IF_FALSE', dest) |
| 908 | |
| 909 | def loadFast(self, name): |
| 910 | self.emit('LOAD_FAST', name) |
| 911 | |
| 912 | def loadGlobal(self, name): |
| 913 | self.emit('LOAD_GLOBAL', name) |
| 914 | |
| 915 | def binaryAdd(self): |
| 916 | self.emit('BINARY_ADD') |
| 917 | |
| 918 | def compareOp(self, op): |
| 919 | self.emit('COMPARE_OP', op) |
| 920 | |
| 921 | def loadConst(self, val): |
| 922 | self.emit('LOAD_CONST', val) |
| 923 | |
| 924 | def returnValue(self): |
| 925 | self.emit('RETURN_VALUE') |
| 926 | |
| 927 | def jumpForward(self, dest): |
| 928 | self.emit('JUMP_FORWARD', dest) |
| 929 | |
| 930 | def raiseVarargs(self, num): |
| 931 | self.emit('RAISE_VARARGS', num) |
| 932 | |
| 933 | def callFunction(self, num): |
| 934 | self.emit('CALL_FUNCTION', num) |
| 935 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 936 | # this version of emit + arbitrary hooks might work, but it's damn |
| 937 | # messy. |
| 938 | |
| 939 | def emit(self, *args): |
| 940 | self._emitDispatch(args[0], args[1:]) |
| 941 | self.insts.append(args) |
| 942 | |
| 943 | def _emitDispatch(self, type, args): |
| 944 | for func in self._emit_hooks.get(type, []): |
| 945 | func(self, args) |
| 946 | |
| 947 | _emit_hooks = {} |
| 948 | |
| 949 | class LineAddrTable: |
| 950 | """lnotab |
| 951 | |
| 952 | This class builds the lnotab, which is undocumented but described |
| 953 | by com_set_lineno in compile.c. Here's an attempt at explanation: |
| 954 | |
| 955 | For each SET_LINENO instruction after the first one, two bytes are |
| 956 | added to lnotab. (In some cases, multiple two-byte entries are |
| 957 | added.) The first byte is the distance in bytes between the |
| 958 | instruction for the last SET_LINENO and the current SET_LINENO. |
| 959 | The second byte is offset in line numbers. If either offset is |
| 960 | greater than 255, multiple two-byte entries are added -- one entry |
| 961 | for each factor of 255. |
| 962 | """ |
| 963 | |
| 964 | def __init__(self): |
| 965 | self.code = [] |
| 966 | self.codeOffset = 0 |
| 967 | self.firstline = 0 |
| 968 | self.lastline = 0 |
| 969 | self.lastoff = 0 |
| 970 | self.lnotab = [] |
| 971 | |
| 972 | def addCode(self, code): |
| 973 | self.code.append(code) |
| 974 | self.codeOffset = self.codeOffset + len(code) |
| 975 | |
| 976 | def nextLine(self, lineno): |
| 977 | if self.firstline == 0: |
| 978 | self.firstline = lineno |
| 979 | self.lastline = lineno |
| 980 | else: |
| 981 | # compute deltas |
| 982 | addr = self.codeOffset - self.lastoff |
| 983 | line = lineno - self.lastline |
| 984 | while addr > 0 or line > 0: |
| 985 | # write the values in 1-byte chunks that sum |
| 986 | # to desired value |
| 987 | trunc_addr = addr |
| 988 | trunc_line = line |
| 989 | if trunc_addr > 255: |
| 990 | trunc_addr = 255 |
| 991 | if trunc_line > 255: |
| 992 | trunc_line = 255 |
| 993 | self.lnotab.append(trunc_addr) |
| 994 | self.lnotab.append(trunc_line) |
| 995 | addr = addr - trunc_addr |
| 996 | line = line - trunc_line |
| 997 | self.lastline = lineno |
| 998 | self.lastoff = self.codeOffset |
| 999 | |
| 1000 | def getCode(self): |
| 1001 | return string.join(self.code, '') |
| 1002 | |
| 1003 | def getTable(self): |
| 1004 | return string.join(map(chr, self.lnotab), '') |
| 1005 | |
| 1006 | class CompiledModule: |
| 1007 | """Store the code object for a compiled module |
| 1008 | |
| 1009 | XXX Not clear how the code objects will be stored. Seems possible |
| 1010 | that a single code attribute is sufficient, because it will |
| 1011 | contains references to all the need code objects. That might be |
| 1012 | messy, though. |
| 1013 | """ |
| 1014 | MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24)) |
| 1015 | |
| 1016 | def __init__(self, source, filename): |
| 1017 | self.source = source |
| 1018 | self.filename = filename |
| 1019 | |
| 1020 | def compile(self): |
| 1021 | t = transformer.Transformer() |
| 1022 | self.ast = t.parsesuite(self.source) |
| 1023 | cg = CodeGenerator(self.filename) |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 1024 | walk(self.ast, cg, walker=ExampleASTVisitor) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 1025 | self.code = cg.emit() |
| 1026 | |
| 1027 | def dump(self, path): |
| 1028 | """create a .pyc file""" |
| 1029 | f = open(path, 'wb') |
| 1030 | f.write(self._pyc_header()) |
| 1031 | marshal.dump(self.code, f) |
| 1032 | f.close() |
| 1033 | |
| 1034 | def _pyc_header(self): |
| 1035 | # compile.c uses marshal to write a long directly, with |
| 1036 | # calling the interface that would also generate a 1-byte code |
| 1037 | # to indicate the type of the value. simplest way to get the |
| 1038 | # same effect is to call marshal and then skip the code. |
| 1039 | magic = marshal.dumps(self.MAGIC)[1:] |
| 1040 | mtime = os.stat(self.filename)[stat.ST_MTIME] |
| 1041 | mtime = struct.pack('i', mtime) |
| 1042 | return magic + mtime |
| 1043 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 1044 | if __name__ == "__main__": |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 1045 | import getopt |
| 1046 | |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 1047 | opts, args = getopt.getopt(sys.argv[1:], 'vq') |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 1048 | for k, v in opts: |
| 1049 | if k == '-v': |
| 1050 | ASTVisitor.VERBOSE = ASTVisitor.VERBOSE + 1 |
| 1051 | print k |
Jeremy Hylton | 5e0ce53 | 2000-02-10 00:47:08 +0000 | [diff] [blame] | 1052 | if k == '-q': |
| 1053 | f = open('/dev/null', 'wb') |
| 1054 | sys.stdout = f |
Jeremy Hylton | 4024560 | 2000-02-08 21:15:48 +0000 | [diff] [blame] | 1055 | if args: |
| 1056 | filename = args[0] |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 1057 | else: |
| 1058 | filename = 'test.py' |
| 1059 | buf = open(filename).read() |
| 1060 | mod = CompiledModule(buf, filename) |
| 1061 | mod.compile() |
| 1062 | mod.dump(filename + 'c') |