Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 1 | """Python bytecode generator |
| 2 | |
| 3 | Currently contains generic ASTVisitor code, a LocalNameFinder, and a |
| 4 | CodeGenerator. Eventually, this will get split into the ASTVisitor as |
| 5 | a generic tool and CodeGenerator as a specific tool. |
| 6 | """ |
| 7 | |
| 8 | from p2c import transformer, ast |
| 9 | import dis |
| 10 | import misc |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 11 | import marshal |
| 12 | import new |
| 13 | import string |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 14 | import sys |
| 15 | import os |
| 16 | import stat |
| 17 | import struct |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 18 | |
| 19 | def parse(path): |
| 20 | f = open(path) |
| 21 | src = f.read() |
| 22 | f.close() |
| 23 | t = transformer.Transformer() |
| 24 | return t.parsesuite(src) |
| 25 | |
| 26 | def walk(tree, visitor): |
| 27 | w = ASTVisitor() |
| 28 | w.preorder(tree, visitor) |
| 29 | return w.visitor |
| 30 | |
| 31 | class ASTVisitor: |
| 32 | """Performs a depth-first walk of the AST |
| 33 | |
| 34 | The ASTVisitor will walk the AST, performing either a preorder or |
| 35 | postorder traversal depending on which method is called. |
| 36 | |
| 37 | methods: |
| 38 | preorder(tree, visitor) |
| 39 | postorder(tree, visitor) |
| 40 | tree: an instance of ast.Node |
| 41 | visitor: an instance with visitXXX methods |
| 42 | |
| 43 | The ASTVisitor is responsible for walking over the tree in the |
| 44 | correct order. For each node, it checks the visitor argument for |
| 45 | a method named 'visitNodeType' where NodeType is the name of the |
| 46 | node's class, e.g. Classdef. If the method exists, it is called |
| 47 | with the node as its sole argument. |
| 48 | |
| 49 | The visitor method for a particular node type can control how |
| 50 | child nodes are visited during a preorder walk. (It can't control |
| 51 | the order during a postorder walk, because it is called _after_ |
| 52 | the walk has occurred.) The ASTVisitor modifies the visitor |
| 53 | argument by adding a visit method to the visitor; this method can |
| 54 | be used to visit a particular child node. If the visitor method |
| 55 | returns a true value, the ASTVisitor will not traverse the child |
| 56 | nodes. |
| 57 | |
| 58 | XXX The interface for controlling the preorder walk needs to be |
| 59 | re-considered. The current interface is convenient for visitors |
| 60 | that mostly let the ASTVisitor do everything. For something like |
| 61 | a code generator, where you want to walk to occur in a specific |
| 62 | order, it's a pain to add "return 1" to the end of each method. |
| 63 | |
| 64 | XXX Perhaps I can use a postorder walk for the code generator? |
| 65 | """ |
| 66 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 67 | VERBOSE = 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 68 | |
| 69 | def __init__(self): |
| 70 | self.node = None |
| 71 | |
| 72 | def preorder(self, tree, visitor): |
| 73 | """Do preorder walk of tree using visitor""" |
| 74 | self.visitor = visitor |
| 75 | visitor.visit = self._preorder |
| 76 | self._preorder(tree) |
| 77 | |
| 78 | def _preorder(self, node): |
| 79 | stop = self.dispatch(node) |
| 80 | if stop: |
| 81 | return |
| 82 | for child in node.getChildren(): |
| 83 | if isinstance(child, ast.Node): |
| 84 | self._preorder(child) |
| 85 | |
| 86 | def postorder(self, tree, visitor): |
| 87 | """Do preorder walk of tree using visitor""" |
| 88 | self.visitor = visitor |
| 89 | visitor.visit = self._postorder |
| 90 | self._postorder(tree) |
| 91 | |
| 92 | def _postorder(self, tree): |
| 93 | for child in node.getChildren(): |
| 94 | if isinstance(child, ast.Node): |
| 95 | self._preorder(child) |
| 96 | self.dispatch(node) |
| 97 | |
| 98 | def dispatch(self, node): |
| 99 | self.node = node |
| 100 | className = node.__class__.__name__ |
| 101 | meth = getattr(self.visitor, 'visit' + className, None) |
| 102 | if self.VERBOSE: |
| 103 | print "dispatch", className, (meth and meth.__name__ or '') |
| 104 | if meth: |
| 105 | return meth(node) |
| 106 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 107 | class CodeGenerator: |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 108 | def __init__(self, filename=None): |
| 109 | self.filename = filename |
| 110 | self.code = PythonVMCode(filename=filename) |
| 111 | self.code.setFlags(0) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 112 | self.locals = misc.Stack() |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 113 | # track the current and max stack size |
| 114 | # XXX does this belong here or in the PythonVMCode? |
| 115 | self.curStack = 0 |
| 116 | self.maxStack = 0 |
| 117 | |
| 118 | def emit(self): |
| 119 | """Create a Python code object |
| 120 | |
| 121 | XXX It is confusing that this method isn't related to the |
| 122 | method named emit in the PythonVMCode. |
| 123 | """ |
| 124 | return self.code.makeCodeObject(self.maxStack) |
| 125 | |
| 126 | def push(self, n): |
| 127 | self.curStack = self.curStack + n |
| 128 | if self.curStack > self.maxStack: |
| 129 | self.maxStack = self.curStack |
| 130 | |
| 131 | def pop(self, n): |
| 132 | if n >= self.curStack: |
| 133 | self.curStack = self.curStack - n |
| 134 | else: |
| 135 | self.curStack = 0 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 136 | |
| 137 | def visitDiscard(self, node): |
| 138 | return 1 |
| 139 | |
| 140 | def visitModule(self, node): |
| 141 | lnf = walk(node.node, LocalNameFinder()) |
| 142 | self.locals.push(lnf.getLocals()) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 143 | self.visit(node.node) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 144 | self.code.emit('LOAD_CONST', None) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 145 | self.code.emit('RETURN_VALUE') |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 146 | return 1 |
| 147 | |
| 148 | def visitFunction(self, node): |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 149 | codeBody = NestedCodeGenerator(node, filename=self.filename) |
| 150 | walk(node, codeBody) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 151 | self.code.setLineNo(node.lineno) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 152 | self.code.emit('LOAD_CONST', codeBody) |
| 153 | self.code.emit('MAKE_FUNCTION', 0) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 154 | self.code.emit('STORE_NAME', node.name) |
| 155 | return 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 156 | |
| 157 | def visitCallFunc(self, node): |
| 158 | self.visit(node.node) |
| 159 | for arg in node.args: |
| 160 | self.visit(arg) |
| 161 | self.code.callFunction(len(node.args)) |
| 162 | return 1 |
| 163 | |
| 164 | def visitIf(self, node): |
| 165 | after = ForwardRef() |
| 166 | for test, suite in node.tests: |
| 167 | self.code.setLineNo(test.lineno) |
| 168 | self.visit(test) |
| 169 | dest = ForwardRef() |
| 170 | self.code.jumpIfFalse(dest) |
| 171 | self.code.popTop() |
| 172 | self.visit(suite) |
| 173 | self.code.jumpForward(after) |
| 174 | dest.bind(self.code.getCurInst()) |
| 175 | self.code.popTop() |
| 176 | if node.else_: |
| 177 | self.visit(node.else_) |
| 178 | after.bind(self.code.getCurInst()) |
| 179 | return 1 |
| 180 | |
| 181 | def visitCompare(self, node): |
| 182 | """Comment from compile.c follows: |
| 183 | |
| 184 | The following code is generated for all but the last |
| 185 | comparison in a chain: |
| 186 | |
| 187 | label: on stack: opcode: jump to: |
| 188 | |
| 189 | a <code to load b> |
| 190 | a, b DUP_TOP |
| 191 | a, b, b ROT_THREE |
| 192 | b, a, b COMPARE_OP |
| 193 | b, 0-or-1 JUMP_IF_FALSE L1 |
| 194 | b, 1 POP_TOP |
| 195 | b |
| 196 | |
| 197 | We are now ready to repeat this sequence for the next |
| 198 | comparison in the chain. |
| 199 | |
| 200 | For the last we generate: |
| 201 | |
| 202 | b <code to load c> |
| 203 | b, c COMPARE_OP |
| 204 | 0-or-1 |
| 205 | |
| 206 | If there were any jumps to L1 (i.e., there was more than one |
| 207 | comparison), we generate: |
| 208 | |
| 209 | 0-or-1 JUMP_FORWARD L2 |
| 210 | L1: b, 0 ROT_TWO |
| 211 | 0, b POP_TOP |
| 212 | 0 |
| 213 | L2: 0-or-1 |
| 214 | """ |
| 215 | self.visit(node.expr) |
| 216 | # if refs are never emitted, subsequent bind call has no effect |
| 217 | l1 = ForwardRef() |
| 218 | l2 = ForwardRef() |
| 219 | for op, code in node.ops[:-1]: |
| 220 | # emit every comparison except the last |
| 221 | self.visit(code) |
| 222 | self.code.dupTop() |
| 223 | self.code.rotThree() |
| 224 | self.code.compareOp(op) |
| 225 | self.code.jumpIfFalse(l1) |
| 226 | self.code.popTop() |
| 227 | if node.ops: |
| 228 | # emit the last comparison |
| 229 | op, code = node.ops[-1] |
| 230 | self.visit(code) |
| 231 | self.code.compareOp(op) |
| 232 | if len(node.ops) > 1: |
| 233 | self.code.jumpForward(l2) |
| 234 | l1.bind(self.code.getCurInst()) |
| 235 | self.code.rotTwo() |
| 236 | self.code.popTop() |
| 237 | l2.bind(self.code.getCurInst()) |
| 238 | return 1 |
| 239 | |
| 240 | def binaryOp(self, node, op): |
| 241 | self.visit(node.left) |
| 242 | self.visit(node.right) |
| 243 | self.code.emit(op) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 244 | self.pop(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 245 | return 1 |
| 246 | |
| 247 | def visitAdd(self, node): |
| 248 | return self.binaryOp(node, 'BINARY_ADD') |
| 249 | |
| 250 | def visitSub(self, node): |
| 251 | return self.binaryOp(node, 'BINARY_SUBTRACT') |
| 252 | |
| 253 | def visitMul(self, node): |
| 254 | return self.binaryOp(node, 'BINARY_MULTIPLY') |
| 255 | |
| 256 | def visitDiv(self, node): |
| 257 | return self.binaryOp(node, 'BINARY_DIVIDE') |
| 258 | |
| 259 | def visitName(self, node): |
| 260 | locals = self.locals.top() |
| 261 | if locals.has_elt(node.name): |
| 262 | self.code.loadFast(node.name) |
| 263 | else: |
| 264 | self.code.loadGlobal(node.name) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 265 | self.push(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 266 | |
| 267 | def visitConst(self, node): |
| 268 | self.code.loadConst(node.value) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 269 | self.push(1) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 270 | |
| 271 | def visitReturn(self, node): |
| 272 | self.code.setLineNo(node.lineno) |
| 273 | self.visit(node.value) |
| 274 | self.code.returnValue() |
| 275 | return 1 |
| 276 | |
| 277 | def visitRaise(self, node): |
| 278 | self.code.setLineNo(node.lineno) |
| 279 | n = 0 |
| 280 | if node.expr1: |
| 281 | self.visit(node.expr1) |
| 282 | n = n + 1 |
| 283 | if node.expr2: |
| 284 | self.visit(node.expr2) |
| 285 | n = n + 1 |
| 286 | if node.expr3: |
| 287 | self.visit(node.expr3) |
| 288 | n = n + 1 |
| 289 | self.code.raiseVarargs(n) |
| 290 | return 1 |
| 291 | |
| 292 | def visitPrint(self, node): |
| 293 | self.code.setLineNo(node.lineno) |
| 294 | for child in node.nodes: |
| 295 | self.visit(child) |
| 296 | self.code.emit('PRINT_ITEM') |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 297 | self.pop(len(node.nodes)) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 298 | return 1 |
| 299 | |
| 300 | def visitPrintnl(self, node): |
| 301 | self.visitPrint(node) |
| 302 | self.code.emit('PRINT_NEWLINE') |
| 303 | return 1 |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 304 | |
| 305 | class NestedCodeGenerator(CodeGenerator): |
| 306 | """Generate code for a function object within another scope |
| 307 | |
| 308 | XXX not clear that this subclass is needed |
| 309 | """ |
| 310 | super_init = CodeGenerator.__init__ |
| 311 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 312 | def __init__(self, func, filename='<?>'): |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 313 | """code and args of function or class being walked |
| 314 | |
| 315 | XXX need to separately pass to ASTVisitor. the constructor |
| 316 | only uses the code object to find the local names |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 317 | |
| 318 | Copies code form parent __init__ rather than calling it. |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 319 | """ |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 320 | self.name = func.name |
| 321 | self.super_init(filename) |
| 322 | args = func.argnames |
| 323 | self.code = PythonVMCode(len(args), name=func.name, |
| 324 | filename=filename) |
| 325 | if func.varargs: |
| 326 | self.code.setVarArgs() |
| 327 | if func.kwargs: |
| 328 | self.code.setKWArgs() |
| 329 | lnf = walk(func.code, LocalNameFinder(args)) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 330 | self.locals.push(lnf.getLocals()) |
| 331 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 332 | def __repr__(self): |
| 333 | return "<NestedCodeGenerator: %s>" % self.name |
| 334 | |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 335 | def visitFunction(self, node): |
| 336 | lnf = walk(node.code, LocalNameFinder(node.argnames)) |
| 337 | self.locals.push(lnf.getLocals()) |
| 338 | # XXX need to handle def foo((a, b)): |
| 339 | self.code.setLineNo(node.lineno) |
| 340 | self.visit(node.code) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 341 | self.code.emit('LOAD_CONST', None) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 342 | self.code.emit('RETURN_VALUE') |
| 343 | return 1 |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 344 | |
| 345 | class LocalNameFinder: |
| 346 | def __init__(self, names=()): |
| 347 | self.names = misc.Set() |
| 348 | for name in names: |
| 349 | self.names.add(name) |
| 350 | |
| 351 | def getLocals(self): |
| 352 | return self.names |
| 353 | |
| 354 | def visitFunction(self, node): |
| 355 | self.names.add(node.name) |
| 356 | return 1 |
| 357 | |
| 358 | def visitImport(self, node): |
| 359 | for name in node.names: |
| 360 | self.names.add(name) |
| 361 | |
| 362 | def visitFrom(self, node): |
| 363 | for name in node.names: |
| 364 | self.names.add(name) |
| 365 | |
| 366 | def visitClassdef(self, node): |
| 367 | self.names.add(node.name) |
| 368 | return 1 |
| 369 | |
| 370 | def visitAssName(self, node): |
| 371 | self.names.add(node.name) |
| 372 | |
| 373 | class Label: |
| 374 | def __init__(self, num): |
| 375 | self.num = num |
| 376 | def __repr__(self): |
| 377 | return "Label(%d)" % self.num |
| 378 | |
| 379 | class ForwardRef: |
| 380 | count = 0 |
| 381 | |
| 382 | def __init__(self, id=None, val=None): |
| 383 | if id is None: |
| 384 | id = ForwardRef.count |
| 385 | ForwardRef.count = ForwardRef.count + 1 |
| 386 | self.id = id |
| 387 | self.val = val |
| 388 | |
| 389 | def __repr__(self): |
| 390 | if self.val: |
| 391 | return "ForwardRef(val=%d)" % self.val |
| 392 | else: |
| 393 | return "ForwardRef(id=%d)" % self.id |
| 394 | |
| 395 | def bind(self, inst): |
| 396 | self.val = inst |
| 397 | |
| 398 | def resolve(self): |
| 399 | return self.val |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 400 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 401 | def add_hook(hooks, type, meth): |
| 402 | """Helper function for PythonVMCode _emit_hooks""" |
| 403 | l = hooks.get(type, []) |
| 404 | l.append(meth) |
| 405 | hooks[type] = l |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 406 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 407 | class PythonVMCode: |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 408 | """Creates Python code objects |
| 409 | |
| 410 | The new module is used to create the code object. The following |
| 411 | attribute definitions are included from the reference manual: |
| 412 | |
| 413 | co_name gives the function name |
| 414 | co_argcount is the number of positional arguments (including |
| 415 | arguments with default values) |
| 416 | co_nlocals is the number of local variables used by the function |
| 417 | (including arguments) |
| 418 | co_varnames is a tuple containing the names of the local variables |
| 419 | (starting with the argument names) |
| 420 | co_code is a string representing the sequence of bytecode instructions |
| 421 | co_consts is a tuple containing the literals used by the bytecode |
| 422 | co_names is a tuple containing the names used by the bytecode |
| 423 | co_filename is the filename from which the code was compiled |
| 424 | co_firstlineno is the first line number of the function |
| 425 | co_lnotab is a string encoding the mapping from byte code offsets |
| 426 | to line numbers (for detais see the source code of the |
| 427 | interpreter) |
| 428 | see code com_set_lineno and com_add_lnotab |
| 429 | it's a string with 2bytes per set_lineno |
| 430 | |
| 431 | co_stacksize is the required stack size (including local variables) |
| 432 | co_flags is an integer encoding a number of flags for the |
| 433 | interpreter. |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 434 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 435 | The following flag bits are defined for co_flags: bit 2 is set if |
| 436 | the function uses the "*arguments" syntax to accept an arbitrary |
| 437 | number of positional arguments; bit 3 is set if the function uses |
| 438 | the "**keywords" syntax to accept arbitrary keyword arguments; |
| 439 | other bits are used internally or reserved for future use. |
| 440 | |
| 441 | If a code object represents a function, the first item in |
| 442 | co_consts is the documentation string of the function, or None if |
| 443 | undefined. |
| 444 | """ |
| 445 | |
| 446 | # XXX flag bits |
| 447 | VARARGS = 0x04 |
| 448 | KWARGS = 0x08 |
| 449 | |
| 450 | def __init__(self, argcount=0, name='?', filename='<?>', |
| 451 | docstring=None): |
| 452 | # XXX why is the default value for flags 3? |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 453 | self.insts = [] |
| 454 | # used by makeCodeObject |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 455 | self.argcount = argcount |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 456 | self.code = '' |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 457 | self.consts = [docstring] |
| 458 | self.filename = filename |
| 459 | self.flags = 3 |
| 460 | self.name = name |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 461 | self.names = [] |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 462 | self.varnames = [] |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 463 | # lnotab support |
| 464 | self.firstlineno = 0 |
| 465 | self.lastlineno = 0 |
| 466 | self.last_addr = 0 |
| 467 | self.lnotab = '' |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 468 | |
| 469 | def __repr__(self): |
| 470 | return "<bytecode: %d instrs>" % len(self.insts) |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 471 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 472 | def setFlags(self, val): |
| 473 | """XXX for module's function""" |
| 474 | self.flags = 0 |
| 475 | |
| 476 | def setVarArgs(self): |
| 477 | self.flags = self.flags | self.VARARGS |
| 478 | |
| 479 | def setKWArgs(self): |
| 480 | self.flags = self.flags | self.KWARGS |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 481 | |
| 482 | def getCurInst(self): |
| 483 | return len(self.insts) |
| 484 | |
| 485 | def getNextInst(self): |
| 486 | return len(self.insts) + 1 |
| 487 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 488 | def dump(self, io=sys.stdout): |
| 489 | i = 0 |
| 490 | for inst in self.insts: |
| 491 | if inst[0] == 'SET_LINENO': |
| 492 | io.write("\n") |
| 493 | io.write(" %3d " % i) |
| 494 | if len(inst) == 1: |
| 495 | io.write("%s\n" % inst) |
| 496 | else: |
| 497 | io.write("%-15.15s\t%s\n" % inst) |
| 498 | i = i + 1 |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 499 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 500 | def makeCodeObject(self, stacksize): |
| 501 | """Make a Python code object |
| 502 | |
| 503 | This creates a Python code object using the new module. This |
| 504 | seems simpler than reverse-engineering the way marshal dumps |
| 505 | code objects into .pyc files. One of the key difficulties is |
| 506 | figuring out how to layout references to code objects that |
| 507 | appear on the VM stack; e.g. |
| 508 | 3 SET_LINENO 1 |
| 509 | 6 LOAD_CONST 0 (<code object fact at 8115878 [...] |
| 510 | 9 MAKE_FUNCTION 0 |
| 511 | 12 STORE_NAME 0 (fact) |
| 512 | |
| 513 | """ |
| 514 | |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 515 | self._findOffsets() |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 516 | lnotab = LineAddrTable() |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 517 | for t in self.insts: |
| 518 | opname = t[0] |
| 519 | if len(t) == 1: |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 520 | lnotab.addCode(chr(self.opnum[opname])) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 521 | elif len(t) == 2: |
| 522 | oparg = self._convertArg(opname, t[1]) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 523 | if opname == 'SET_LINENO': |
| 524 | lnotab.nextLine(oparg) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 525 | hi, lo = divmod(oparg, 256) |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 526 | lnotab.addCode(chr(self.opnum[opname]) + chr(lo) + |
| 527 | chr(hi)) |
| 528 | # why is a module a special case? |
| 529 | if self.flags == 0: |
| 530 | nlocals = 0 |
| 531 | else: |
| 532 | nlocals = len(self.varnames) |
| 533 | co = new.code(self.argcount, nlocals, stacksize, |
| 534 | self.flags, lnotab.getCode(), self._getConsts(), |
| 535 | tuple(self.names), tuple(self.varnames), |
| 536 | self.filename, self.name, self.firstlineno, |
| 537 | lnotab.getTable()) |
| 538 | return co |
| 539 | |
| 540 | def _getConsts(self): |
| 541 | """Return a tuple for the const slot of a code object |
| 542 | |
| 543 | Converts PythonVMCode objects to code objects |
| 544 | """ |
| 545 | l = [] |
| 546 | for elt in self.consts: |
| 547 | if isinstance(elt, CodeGenerator): |
| 548 | l.append(elt.emit()) |
| 549 | else: |
| 550 | l.append(elt) |
| 551 | return tuple(l) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 552 | |
| 553 | def _findOffsets(self): |
| 554 | """Find offsets for use in resolving ForwardRefs""" |
| 555 | self.offsets = [] |
| 556 | cur = 0 |
| 557 | for t in self.insts: |
| 558 | self.offsets.append(cur) |
| 559 | l = len(t) |
| 560 | if l == 1: |
| 561 | cur = cur + 1 |
| 562 | elif l == 2: |
| 563 | arg = t[1] |
| 564 | if isinstance(arg, ForwardRef): |
| 565 | arg.__offset = cur |
| 566 | cur = cur + 3 |
| 567 | |
| 568 | def _convertArg(self, op, arg): |
| 569 | """Convert the string representation of an arg to a number |
| 570 | |
| 571 | The specific handling depends on the opcode. |
| 572 | |
| 573 | XXX This first implementation isn't going to be very |
| 574 | efficient. |
| 575 | """ |
| 576 | if op == 'SET_LINENO': |
| 577 | return arg |
| 578 | if op == 'LOAD_CONST': |
| 579 | return self._lookupName(arg, self.consts) |
| 580 | if op == 'LOAD_FAST': |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 581 | if arg in self.names: |
| 582 | return self._lookupName(arg, self.varnames) |
| 583 | else: |
| 584 | return self._lookupName(arg, self.varnames, self.names) |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 585 | if op == 'LOAD_GLOBAL': |
| 586 | return self._lookupName(arg, self.names) |
| 587 | if op == 'STORE_NAME': |
| 588 | return self._lookupName(arg, self.names) |
| 589 | if op == 'COMPARE_OP': |
| 590 | return self.cmp_op.index(arg) |
| 591 | if self.hasjrel.has_elt(op): |
| 592 | return self.offsets[arg.resolve()] |
| 593 | if self.hasjabs.has_elt(op): |
| 594 | return self.offsets[arg.resolve()] - arg.__offset |
Jeremy Hylton | 0fdffcf | 2000-02-04 19:37:35 +0000 | [diff] [blame] | 595 | return arg |
| 596 | |
| 597 | def _lookupName(self, name, list, list2=None): |
| 598 | """Return index of name in list, appending if necessary |
| 599 | |
| 600 | Yicky hack: Second list can be used for lookup of local names |
| 601 | where the name needs to be added to varnames and names. |
| 602 | """ |
| 603 | if name in list: |
| 604 | return list.index(name) |
| 605 | else: |
| 606 | end = len(list) |
| 607 | list.append(name) |
| 608 | if list2 is not None: |
| 609 | list2.append(name) |
| 610 | return end |
| 611 | |
| 612 | # Convert some stuff from the dis module for local use |
| 613 | |
| 614 | cmp_op = list(dis.cmp_op) |
| 615 | hasjrel = misc.Set() |
| 616 | for i in dis.hasjrel: |
| 617 | hasjrel.add(dis.opname[i]) |
| 618 | hasjabs = misc.Set() |
| 619 | for i in dis.hasjabs: |
| 620 | hasjabs.add(dis.opname[i]) |
| 621 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 622 | opnum = {} |
| 623 | for num in range(len(dis.opname)): |
| 624 | opnum[dis.opname[num]] = num |
| 625 | |
| 626 | # the interface below here seemed good at first. upon real use, |
| 627 | # it seems redundant to add a function for each opcode, |
| 628 | # particularly because the method and opcode basically have the |
| 629 | # same name. |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 630 | # on the other hand, we need to track things like stack depth in |
| 631 | # order to generator code objects. if we wrap instructions in a |
| 632 | # method, we get an easy way to track these. a simpler |
| 633 | # approach, however, would be to define hooks that can be called |
| 634 | # by emit. |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 635 | |
| 636 | def setLineNo(self, num): |
| 637 | self.emit('SET_LINENO', num) |
| 638 | |
| 639 | def popTop(self): |
| 640 | self.emit('POP_TOP') |
| 641 | |
| 642 | def dupTop(self): |
| 643 | self.emit('DUP_TOP') |
| 644 | |
| 645 | def rotTwo(self): |
| 646 | self.emit('ROT_TWO') |
| 647 | |
| 648 | def rotThree(self): |
| 649 | self.emit('ROT_THREE') |
| 650 | |
| 651 | def jumpIfFalse(self, dest): |
| 652 | self.emit('JUMP_IF_FALSE', dest) |
| 653 | |
| 654 | def loadFast(self, name): |
| 655 | self.emit('LOAD_FAST', name) |
| 656 | |
| 657 | def loadGlobal(self, name): |
| 658 | self.emit('LOAD_GLOBAL', name) |
| 659 | |
| 660 | def binaryAdd(self): |
| 661 | self.emit('BINARY_ADD') |
| 662 | |
| 663 | def compareOp(self, op): |
| 664 | self.emit('COMPARE_OP', op) |
| 665 | |
| 666 | def loadConst(self, val): |
| 667 | self.emit('LOAD_CONST', val) |
| 668 | |
| 669 | def returnValue(self): |
| 670 | self.emit('RETURN_VALUE') |
| 671 | |
| 672 | def jumpForward(self, dest): |
| 673 | self.emit('JUMP_FORWARD', dest) |
| 674 | |
| 675 | def raiseVarargs(self, num): |
| 676 | self.emit('RAISE_VARARGS', num) |
| 677 | |
| 678 | def callFunction(self, num): |
| 679 | self.emit('CALL_FUNCTION', num) |
| 680 | |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 681 | # this version of emit + arbitrary hooks might work, but it's damn |
| 682 | # messy. |
| 683 | |
| 684 | def emit(self, *args): |
| 685 | self._emitDispatch(args[0], args[1:]) |
| 686 | self.insts.append(args) |
| 687 | |
| 688 | def _emitDispatch(self, type, args): |
| 689 | for func in self._emit_hooks.get(type, []): |
| 690 | func(self, args) |
| 691 | |
| 692 | _emit_hooks = {} |
| 693 | |
| 694 | class LineAddrTable: |
| 695 | """lnotab |
| 696 | |
| 697 | This class builds the lnotab, which is undocumented but described |
| 698 | by com_set_lineno in compile.c. Here's an attempt at explanation: |
| 699 | |
| 700 | For each SET_LINENO instruction after the first one, two bytes are |
| 701 | added to lnotab. (In some cases, multiple two-byte entries are |
| 702 | added.) The first byte is the distance in bytes between the |
| 703 | instruction for the last SET_LINENO and the current SET_LINENO. |
| 704 | The second byte is offset in line numbers. If either offset is |
| 705 | greater than 255, multiple two-byte entries are added -- one entry |
| 706 | for each factor of 255. |
| 707 | """ |
| 708 | |
| 709 | def __init__(self): |
| 710 | self.code = [] |
| 711 | self.codeOffset = 0 |
| 712 | self.firstline = 0 |
| 713 | self.lastline = 0 |
| 714 | self.lastoff = 0 |
| 715 | self.lnotab = [] |
| 716 | |
| 717 | def addCode(self, code): |
| 718 | self.code.append(code) |
| 719 | self.codeOffset = self.codeOffset + len(code) |
| 720 | |
| 721 | def nextLine(self, lineno): |
| 722 | if self.firstline == 0: |
| 723 | self.firstline = lineno |
| 724 | self.lastline = lineno |
| 725 | else: |
| 726 | # compute deltas |
| 727 | addr = self.codeOffset - self.lastoff |
| 728 | line = lineno - self.lastline |
| 729 | while addr > 0 or line > 0: |
| 730 | # write the values in 1-byte chunks that sum |
| 731 | # to desired value |
| 732 | trunc_addr = addr |
| 733 | trunc_line = line |
| 734 | if trunc_addr > 255: |
| 735 | trunc_addr = 255 |
| 736 | if trunc_line > 255: |
| 737 | trunc_line = 255 |
| 738 | self.lnotab.append(trunc_addr) |
| 739 | self.lnotab.append(trunc_line) |
| 740 | addr = addr - trunc_addr |
| 741 | line = line - trunc_line |
| 742 | self.lastline = lineno |
| 743 | self.lastoff = self.codeOffset |
| 744 | |
| 745 | def getCode(self): |
| 746 | return string.join(self.code, '') |
| 747 | |
| 748 | def getTable(self): |
| 749 | return string.join(map(chr, self.lnotab), '') |
| 750 | |
| 751 | class CompiledModule: |
| 752 | """Store the code object for a compiled module |
| 753 | |
| 754 | XXX Not clear how the code objects will be stored. Seems possible |
| 755 | that a single code attribute is sufficient, because it will |
| 756 | contains references to all the need code objects. That might be |
| 757 | messy, though. |
| 758 | """ |
| 759 | MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24)) |
| 760 | |
| 761 | def __init__(self, source, filename): |
| 762 | self.source = source |
| 763 | self.filename = filename |
| 764 | |
| 765 | def compile(self): |
| 766 | t = transformer.Transformer() |
| 767 | self.ast = t.parsesuite(self.source) |
| 768 | cg = CodeGenerator(self.filename) |
| 769 | walk(self.ast, cg) |
| 770 | self.code = cg.emit() |
| 771 | |
| 772 | def dump(self, path): |
| 773 | """create a .pyc file""" |
| 774 | f = open(path, 'wb') |
| 775 | f.write(self._pyc_header()) |
| 776 | marshal.dump(self.code, f) |
| 777 | f.close() |
| 778 | |
| 779 | def _pyc_header(self): |
| 780 | # compile.c uses marshal to write a long directly, with |
| 781 | # calling the interface that would also generate a 1-byte code |
| 782 | # to indicate the type of the value. simplest way to get the |
| 783 | # same effect is to call marshal and then skip the code. |
| 784 | magic = marshal.dumps(self.MAGIC)[1:] |
| 785 | mtime = os.stat(self.filename)[stat.ST_MTIME] |
| 786 | mtime = struct.pack('i', mtime) |
| 787 | return magic + mtime |
| 788 | |
Jeremy Hylton | 8b6323d | 2000-02-04 00:28:21 +0000 | [diff] [blame] | 789 | if __name__ == "__main__": |
Jeremy Hylton | 53187f3 | 2000-02-08 19:01:29 +0000 | [diff] [blame] | 790 | if len(sys.argv) > 1: |
| 791 | filename = sys.argv[1] |
| 792 | else: |
| 793 | filename = 'test.py' |
| 794 | buf = open(filename).read() |
| 795 | mod = CompiledModule(buf, filename) |
| 796 | mod.compile() |
| 797 | mod.dump(filename + 'c') |