blob: ee0c75b52231aa08686734a53c56da1428bf5c8a [file] [log] [blame]
Jeremy Hylton8b6323d2000-02-04 00:28:21 +00001"""Python bytecode generator
2
3Currently contains generic ASTVisitor code, a LocalNameFinder, and a
4CodeGenerator. Eventually, this will get split into the ASTVisitor as
5a generic tool and CodeGenerator as a specific tool.
6"""
7
8from p2c import transformer, ast
9import dis
10import misc
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +000011import marshal
12import new
13import string
Jeremy Hylton53187f32000-02-08 19:01:29 +000014import sys
15import os
16import stat
17import struct
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000018
19def parse(path):
20 f = open(path)
21 src = f.read()
22 f.close()
23 t = transformer.Transformer()
24 return t.parsesuite(src)
25
26def walk(tree, visitor):
27 w = ASTVisitor()
28 w.preorder(tree, visitor)
29 return w.visitor
30
31class ASTVisitor:
32 """Performs a depth-first walk of the AST
33
34 The ASTVisitor will walk the AST, performing either a preorder or
35 postorder traversal depending on which method is called.
36
37 methods:
38 preorder(tree, visitor)
39 postorder(tree, visitor)
40 tree: an instance of ast.Node
41 visitor: an instance with visitXXX methods
42
43 The ASTVisitor is responsible for walking over the tree in the
44 correct order. For each node, it checks the visitor argument for
45 a method named 'visitNodeType' where NodeType is the name of the
46 node's class, e.g. Classdef. If the method exists, it is called
47 with the node as its sole argument.
48
49 The visitor method for a particular node type can control how
50 child nodes are visited during a preorder walk. (It can't control
51 the order during a postorder walk, because it is called _after_
52 the walk has occurred.) The ASTVisitor modifies the visitor
53 argument by adding a visit method to the visitor; this method can
54 be used to visit a particular child node. If the visitor method
55 returns a true value, the ASTVisitor will not traverse the child
56 nodes.
57
58 XXX The interface for controlling the preorder walk needs to be
59 re-considered. The current interface is convenient for visitors
60 that mostly let the ASTVisitor do everything. For something like
61 a code generator, where you want to walk to occur in a specific
62 order, it's a pain to add "return 1" to the end of each method.
63
64 XXX Perhaps I can use a postorder walk for the code generator?
65 """
66
Jeremy Hylton53187f32000-02-08 19:01:29 +000067 VERBOSE = 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +000068
69 def __init__(self):
70 self.node = None
71
72 def preorder(self, tree, visitor):
73 """Do preorder walk of tree using visitor"""
74 self.visitor = visitor
75 visitor.visit = self._preorder
76 self._preorder(tree)
77
78 def _preorder(self, node):
79 stop = self.dispatch(node)
80 if stop:
81 return
82 for child in node.getChildren():
83 if isinstance(child, ast.Node):
84 self._preorder(child)
85
86 def postorder(self, tree, visitor):
87 """Do preorder walk of tree using visitor"""
88 self.visitor = visitor
89 visitor.visit = self._postorder
90 self._postorder(tree)
91
92 def _postorder(self, tree):
93 for child in node.getChildren():
94 if isinstance(child, ast.Node):
95 self._preorder(child)
96 self.dispatch(node)
97
98 def dispatch(self, node):
99 self.node = node
100 className = node.__class__.__name__
101 meth = getattr(self.visitor, 'visit' + className, None)
102 if self.VERBOSE:
103 print "dispatch", className, (meth and meth.__name__ or '')
104 if meth:
105 return meth(node)
106
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000107class CodeGenerator:
Jeremy Hylton53187f32000-02-08 19:01:29 +0000108 def __init__(self, filename=None):
109 self.filename = filename
110 self.code = PythonVMCode(filename=filename)
111 self.code.setFlags(0)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000112 self.locals = misc.Stack()
Jeremy Hylton53187f32000-02-08 19:01:29 +0000113 # track the current and max stack size
114 # XXX does this belong here or in the PythonVMCode?
115 self.curStack = 0
116 self.maxStack = 0
117
118 def emit(self):
119 """Create a Python code object
120
121 XXX It is confusing that this method isn't related to the
122 method named emit in the PythonVMCode.
123 """
124 return self.code.makeCodeObject(self.maxStack)
125
126 def push(self, n):
127 self.curStack = self.curStack + n
128 if self.curStack > self.maxStack:
129 self.maxStack = self.curStack
130
131 def pop(self, n):
132 if n >= self.curStack:
133 self.curStack = self.curStack - n
134 else:
135 self.curStack = 0
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000136
137 def visitDiscard(self, node):
138 return 1
139
140 def visitModule(self, node):
141 lnf = walk(node.node, LocalNameFinder())
142 self.locals.push(lnf.getLocals())
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000143 self.visit(node.node)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000144 self.code.emit('LOAD_CONST', None)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000145 self.code.emit('RETURN_VALUE')
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000146 return 1
147
148 def visitFunction(self, node):
Jeremy Hylton53187f32000-02-08 19:01:29 +0000149 codeBody = NestedCodeGenerator(node, filename=self.filename)
150 walk(node, codeBody)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000151 self.code.setLineNo(node.lineno)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000152 self.code.emit('LOAD_CONST', codeBody)
153 self.code.emit('MAKE_FUNCTION', 0)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000154 self.code.emit('STORE_NAME', node.name)
155 return 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000156
157 def visitCallFunc(self, node):
158 self.visit(node.node)
159 for arg in node.args:
160 self.visit(arg)
161 self.code.callFunction(len(node.args))
162 return 1
163
164 def visitIf(self, node):
165 after = ForwardRef()
166 for test, suite in node.tests:
167 self.code.setLineNo(test.lineno)
168 self.visit(test)
169 dest = ForwardRef()
170 self.code.jumpIfFalse(dest)
171 self.code.popTop()
172 self.visit(suite)
173 self.code.jumpForward(after)
174 dest.bind(self.code.getCurInst())
175 self.code.popTop()
176 if node.else_:
177 self.visit(node.else_)
178 after.bind(self.code.getCurInst())
179 return 1
180
181 def visitCompare(self, node):
182 """Comment from compile.c follows:
183
184 The following code is generated for all but the last
185 comparison in a chain:
186
187 label: on stack: opcode: jump to:
188
189 a <code to load b>
190 a, b DUP_TOP
191 a, b, b ROT_THREE
192 b, a, b COMPARE_OP
193 b, 0-or-1 JUMP_IF_FALSE L1
194 b, 1 POP_TOP
195 b
196
197 We are now ready to repeat this sequence for the next
198 comparison in the chain.
199
200 For the last we generate:
201
202 b <code to load c>
203 b, c COMPARE_OP
204 0-or-1
205
206 If there were any jumps to L1 (i.e., there was more than one
207 comparison), we generate:
208
209 0-or-1 JUMP_FORWARD L2
210 L1: b, 0 ROT_TWO
211 0, b POP_TOP
212 0
213 L2: 0-or-1
214 """
215 self.visit(node.expr)
216 # if refs are never emitted, subsequent bind call has no effect
217 l1 = ForwardRef()
218 l2 = ForwardRef()
219 for op, code in node.ops[:-1]:
220 # emit every comparison except the last
221 self.visit(code)
222 self.code.dupTop()
223 self.code.rotThree()
224 self.code.compareOp(op)
225 self.code.jumpIfFalse(l1)
226 self.code.popTop()
227 if node.ops:
228 # emit the last comparison
229 op, code = node.ops[-1]
230 self.visit(code)
231 self.code.compareOp(op)
232 if len(node.ops) > 1:
233 self.code.jumpForward(l2)
234 l1.bind(self.code.getCurInst())
235 self.code.rotTwo()
236 self.code.popTop()
237 l2.bind(self.code.getCurInst())
238 return 1
239
240 def binaryOp(self, node, op):
241 self.visit(node.left)
242 self.visit(node.right)
243 self.code.emit(op)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000244 self.pop(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000245 return 1
246
247 def visitAdd(self, node):
248 return self.binaryOp(node, 'BINARY_ADD')
249
250 def visitSub(self, node):
251 return self.binaryOp(node, 'BINARY_SUBTRACT')
252
253 def visitMul(self, node):
254 return self.binaryOp(node, 'BINARY_MULTIPLY')
255
256 def visitDiv(self, node):
257 return self.binaryOp(node, 'BINARY_DIVIDE')
258
259 def visitName(self, node):
260 locals = self.locals.top()
261 if locals.has_elt(node.name):
262 self.code.loadFast(node.name)
263 else:
264 self.code.loadGlobal(node.name)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000265 self.push(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000266
267 def visitConst(self, node):
268 self.code.loadConst(node.value)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000269 self.push(1)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000270
271 def visitReturn(self, node):
272 self.code.setLineNo(node.lineno)
273 self.visit(node.value)
274 self.code.returnValue()
275 return 1
276
277 def visitRaise(self, node):
278 self.code.setLineNo(node.lineno)
279 n = 0
280 if node.expr1:
281 self.visit(node.expr1)
282 n = n + 1
283 if node.expr2:
284 self.visit(node.expr2)
285 n = n + 1
286 if node.expr3:
287 self.visit(node.expr3)
288 n = n + 1
289 self.code.raiseVarargs(n)
290 return 1
291
292 def visitPrint(self, node):
293 self.code.setLineNo(node.lineno)
294 for child in node.nodes:
295 self.visit(child)
296 self.code.emit('PRINT_ITEM')
Jeremy Hylton53187f32000-02-08 19:01:29 +0000297 self.pop(len(node.nodes))
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000298 return 1
299
300 def visitPrintnl(self, node):
301 self.visitPrint(node)
302 self.code.emit('PRINT_NEWLINE')
303 return 1
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000304
305class NestedCodeGenerator(CodeGenerator):
306 """Generate code for a function object within another scope
307
308 XXX not clear that this subclass is needed
309 """
310 super_init = CodeGenerator.__init__
311
Jeremy Hylton53187f32000-02-08 19:01:29 +0000312 def __init__(self, func, filename='<?>'):
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000313 """code and args of function or class being walked
314
315 XXX need to separately pass to ASTVisitor. the constructor
316 only uses the code object to find the local names
Jeremy Hylton53187f32000-02-08 19:01:29 +0000317
318 Copies code form parent __init__ rather than calling it.
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000319 """
Jeremy Hylton53187f32000-02-08 19:01:29 +0000320 self.name = func.name
321 self.super_init(filename)
322 args = func.argnames
323 self.code = PythonVMCode(len(args), name=func.name,
324 filename=filename)
325 if func.varargs:
326 self.code.setVarArgs()
327 if func.kwargs:
328 self.code.setKWArgs()
329 lnf = walk(func.code, LocalNameFinder(args))
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000330 self.locals.push(lnf.getLocals())
331
Jeremy Hylton53187f32000-02-08 19:01:29 +0000332 def __repr__(self):
333 return "<NestedCodeGenerator: %s>" % self.name
334
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000335 def visitFunction(self, node):
336 lnf = walk(node.code, LocalNameFinder(node.argnames))
337 self.locals.push(lnf.getLocals())
338 # XXX need to handle def foo((a, b)):
339 self.code.setLineNo(node.lineno)
340 self.visit(node.code)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000341 self.code.emit('LOAD_CONST', None)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000342 self.code.emit('RETURN_VALUE')
343 return 1
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000344
345class LocalNameFinder:
346 def __init__(self, names=()):
347 self.names = misc.Set()
348 for name in names:
349 self.names.add(name)
350
351 def getLocals(self):
352 return self.names
353
354 def visitFunction(self, node):
355 self.names.add(node.name)
356 return 1
357
358 def visitImport(self, node):
359 for name in node.names:
360 self.names.add(name)
361
362 def visitFrom(self, node):
363 for name in node.names:
364 self.names.add(name)
365
366 def visitClassdef(self, node):
367 self.names.add(node.name)
368 return 1
369
370 def visitAssName(self, node):
371 self.names.add(node.name)
372
373class Label:
374 def __init__(self, num):
375 self.num = num
376 def __repr__(self):
377 return "Label(%d)" % self.num
378
379class ForwardRef:
380 count = 0
381
382 def __init__(self, id=None, val=None):
383 if id is None:
384 id = ForwardRef.count
385 ForwardRef.count = ForwardRef.count + 1
386 self.id = id
387 self.val = val
388
389 def __repr__(self):
390 if self.val:
391 return "ForwardRef(val=%d)" % self.val
392 else:
393 return "ForwardRef(id=%d)" % self.id
394
395 def bind(self, inst):
396 self.val = inst
397
398 def resolve(self):
399 return self.val
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000400
Jeremy Hylton53187f32000-02-08 19:01:29 +0000401def add_hook(hooks, type, meth):
402 """Helper function for PythonVMCode _emit_hooks"""
403 l = hooks.get(type, [])
404 l.append(meth)
405 hooks[type] = l
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000406
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000407class PythonVMCode:
Jeremy Hylton53187f32000-02-08 19:01:29 +0000408 """Creates Python code objects
409
410 The new module is used to create the code object. The following
411 attribute definitions are included from the reference manual:
412
413 co_name gives the function name
414 co_argcount is the number of positional arguments (including
415 arguments with default values)
416 co_nlocals is the number of local variables used by the function
417 (including arguments)
418 co_varnames is a tuple containing the names of the local variables
419 (starting with the argument names)
420 co_code is a string representing the sequence of bytecode instructions
421 co_consts is a tuple containing the literals used by the bytecode
422 co_names is a tuple containing the names used by the bytecode
423 co_filename is the filename from which the code was compiled
424 co_firstlineno is the first line number of the function
425 co_lnotab is a string encoding the mapping from byte code offsets
426 to line numbers (for detais see the source code of the
427 interpreter)
428 see code com_set_lineno and com_add_lnotab
429 it's a string with 2bytes per set_lineno
430
431 co_stacksize is the required stack size (including local variables)
432 co_flags is an integer encoding a number of flags for the
433 interpreter.
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000434
Jeremy Hylton53187f32000-02-08 19:01:29 +0000435 The following flag bits are defined for co_flags: bit 2 is set if
436 the function uses the "*arguments" syntax to accept an arbitrary
437 number of positional arguments; bit 3 is set if the function uses
438 the "**keywords" syntax to accept arbitrary keyword arguments;
439 other bits are used internally or reserved for future use.
440
441 If a code object represents a function, the first item in
442 co_consts is the documentation string of the function, or None if
443 undefined.
444 """
445
446 # XXX flag bits
447 VARARGS = 0x04
448 KWARGS = 0x08
449
450 def __init__(self, argcount=0, name='?', filename='<?>',
451 docstring=None):
452 # XXX why is the default value for flags 3?
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000453 self.insts = []
454 # used by makeCodeObject
Jeremy Hylton53187f32000-02-08 19:01:29 +0000455 self.argcount = argcount
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000456 self.code = ''
Jeremy Hylton53187f32000-02-08 19:01:29 +0000457 self.consts = [docstring]
458 self.filename = filename
459 self.flags = 3
460 self.name = name
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000461 self.names = []
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000462 self.varnames = []
Jeremy Hylton53187f32000-02-08 19:01:29 +0000463 # lnotab support
464 self.firstlineno = 0
465 self.lastlineno = 0
466 self.last_addr = 0
467 self.lnotab = ''
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000468
469 def __repr__(self):
470 return "<bytecode: %d instrs>" % len(self.insts)
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000471
Jeremy Hylton53187f32000-02-08 19:01:29 +0000472 def setFlags(self, val):
473 """XXX for module's function"""
474 self.flags = 0
475
476 def setVarArgs(self):
477 self.flags = self.flags | self.VARARGS
478
479 def setKWArgs(self):
480 self.flags = self.flags | self.KWARGS
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000481
482 def getCurInst(self):
483 return len(self.insts)
484
485 def getNextInst(self):
486 return len(self.insts) + 1
487
Jeremy Hylton53187f32000-02-08 19:01:29 +0000488 def dump(self, io=sys.stdout):
489 i = 0
490 for inst in self.insts:
491 if inst[0] == 'SET_LINENO':
492 io.write("\n")
493 io.write(" %3d " % i)
494 if len(inst) == 1:
495 io.write("%s\n" % inst)
496 else:
497 io.write("%-15.15s\t%s\n" % inst)
498 i = i + 1
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000499
Jeremy Hylton53187f32000-02-08 19:01:29 +0000500 def makeCodeObject(self, stacksize):
501 """Make a Python code object
502
503 This creates a Python code object using the new module. This
504 seems simpler than reverse-engineering the way marshal dumps
505 code objects into .pyc files. One of the key difficulties is
506 figuring out how to layout references to code objects that
507 appear on the VM stack; e.g.
508 3 SET_LINENO 1
509 6 LOAD_CONST 0 (<code object fact at 8115878 [...]
510 9 MAKE_FUNCTION 0
511 12 STORE_NAME 0 (fact)
512
513 """
514
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000515 self._findOffsets()
Jeremy Hylton53187f32000-02-08 19:01:29 +0000516 lnotab = LineAddrTable()
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000517 for t in self.insts:
518 opname = t[0]
519 if len(t) == 1:
Jeremy Hylton53187f32000-02-08 19:01:29 +0000520 lnotab.addCode(chr(self.opnum[opname]))
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000521 elif len(t) == 2:
522 oparg = self._convertArg(opname, t[1])
Jeremy Hylton53187f32000-02-08 19:01:29 +0000523 if opname == 'SET_LINENO':
524 lnotab.nextLine(oparg)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000525 hi, lo = divmod(oparg, 256)
Jeremy Hylton53187f32000-02-08 19:01:29 +0000526 lnotab.addCode(chr(self.opnum[opname]) + chr(lo) +
527 chr(hi))
528 # why is a module a special case?
529 if self.flags == 0:
530 nlocals = 0
531 else:
532 nlocals = len(self.varnames)
533 co = new.code(self.argcount, nlocals, stacksize,
534 self.flags, lnotab.getCode(), self._getConsts(),
535 tuple(self.names), tuple(self.varnames),
536 self.filename, self.name, self.firstlineno,
537 lnotab.getTable())
538 return co
539
540 def _getConsts(self):
541 """Return a tuple for the const slot of a code object
542
543 Converts PythonVMCode objects to code objects
544 """
545 l = []
546 for elt in self.consts:
547 if isinstance(elt, CodeGenerator):
548 l.append(elt.emit())
549 else:
550 l.append(elt)
551 return tuple(l)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000552
553 def _findOffsets(self):
554 """Find offsets for use in resolving ForwardRefs"""
555 self.offsets = []
556 cur = 0
557 for t in self.insts:
558 self.offsets.append(cur)
559 l = len(t)
560 if l == 1:
561 cur = cur + 1
562 elif l == 2:
563 arg = t[1]
564 if isinstance(arg, ForwardRef):
565 arg.__offset = cur
566 cur = cur + 3
567
568 def _convertArg(self, op, arg):
569 """Convert the string representation of an arg to a number
570
571 The specific handling depends on the opcode.
572
573 XXX This first implementation isn't going to be very
574 efficient.
575 """
576 if op == 'SET_LINENO':
577 return arg
578 if op == 'LOAD_CONST':
579 return self._lookupName(arg, self.consts)
580 if op == 'LOAD_FAST':
Jeremy Hylton53187f32000-02-08 19:01:29 +0000581 if arg in self.names:
582 return self._lookupName(arg, self.varnames)
583 else:
584 return self._lookupName(arg, self.varnames, self.names)
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000585 if op == 'LOAD_GLOBAL':
586 return self._lookupName(arg, self.names)
587 if op == 'STORE_NAME':
588 return self._lookupName(arg, self.names)
589 if op == 'COMPARE_OP':
590 return self.cmp_op.index(arg)
591 if self.hasjrel.has_elt(op):
592 return self.offsets[arg.resolve()]
593 if self.hasjabs.has_elt(op):
594 return self.offsets[arg.resolve()] - arg.__offset
Jeremy Hylton0fdffcf2000-02-04 19:37:35 +0000595 return arg
596
597 def _lookupName(self, name, list, list2=None):
598 """Return index of name in list, appending if necessary
599
600 Yicky hack: Second list can be used for lookup of local names
601 where the name needs to be added to varnames and names.
602 """
603 if name in list:
604 return list.index(name)
605 else:
606 end = len(list)
607 list.append(name)
608 if list2 is not None:
609 list2.append(name)
610 return end
611
612 # Convert some stuff from the dis module for local use
613
614 cmp_op = list(dis.cmp_op)
615 hasjrel = misc.Set()
616 for i in dis.hasjrel:
617 hasjrel.add(dis.opname[i])
618 hasjabs = misc.Set()
619 for i in dis.hasjabs:
620 hasjabs.add(dis.opname[i])
621
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000622 opnum = {}
623 for num in range(len(dis.opname)):
624 opnum[dis.opname[num]] = num
625
626 # the interface below here seemed good at first. upon real use,
627 # it seems redundant to add a function for each opcode,
628 # particularly because the method and opcode basically have the
629 # same name.
Jeremy Hylton53187f32000-02-08 19:01:29 +0000630 # on the other hand, we need to track things like stack depth in
631 # order to generator code objects. if we wrap instructions in a
632 # method, we get an easy way to track these. a simpler
633 # approach, however, would be to define hooks that can be called
634 # by emit.
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000635
636 def setLineNo(self, num):
637 self.emit('SET_LINENO', num)
638
639 def popTop(self):
640 self.emit('POP_TOP')
641
642 def dupTop(self):
643 self.emit('DUP_TOP')
644
645 def rotTwo(self):
646 self.emit('ROT_TWO')
647
648 def rotThree(self):
649 self.emit('ROT_THREE')
650
651 def jumpIfFalse(self, dest):
652 self.emit('JUMP_IF_FALSE', dest)
653
654 def loadFast(self, name):
655 self.emit('LOAD_FAST', name)
656
657 def loadGlobal(self, name):
658 self.emit('LOAD_GLOBAL', name)
659
660 def binaryAdd(self):
661 self.emit('BINARY_ADD')
662
663 def compareOp(self, op):
664 self.emit('COMPARE_OP', op)
665
666 def loadConst(self, val):
667 self.emit('LOAD_CONST', val)
668
669 def returnValue(self):
670 self.emit('RETURN_VALUE')
671
672 def jumpForward(self, dest):
673 self.emit('JUMP_FORWARD', dest)
674
675 def raiseVarargs(self, num):
676 self.emit('RAISE_VARARGS', num)
677
678 def callFunction(self, num):
679 self.emit('CALL_FUNCTION', num)
680
Jeremy Hylton53187f32000-02-08 19:01:29 +0000681 # this version of emit + arbitrary hooks might work, but it's damn
682 # messy.
683
684 def emit(self, *args):
685 self._emitDispatch(args[0], args[1:])
686 self.insts.append(args)
687
688 def _emitDispatch(self, type, args):
689 for func in self._emit_hooks.get(type, []):
690 func(self, args)
691
692 _emit_hooks = {}
693
694class LineAddrTable:
695 """lnotab
696
697 This class builds the lnotab, which is undocumented but described
698 by com_set_lineno in compile.c. Here's an attempt at explanation:
699
700 For each SET_LINENO instruction after the first one, two bytes are
701 added to lnotab. (In some cases, multiple two-byte entries are
702 added.) The first byte is the distance in bytes between the
703 instruction for the last SET_LINENO and the current SET_LINENO.
704 The second byte is offset in line numbers. If either offset is
705 greater than 255, multiple two-byte entries are added -- one entry
706 for each factor of 255.
707 """
708
709 def __init__(self):
710 self.code = []
711 self.codeOffset = 0
712 self.firstline = 0
713 self.lastline = 0
714 self.lastoff = 0
715 self.lnotab = []
716
717 def addCode(self, code):
718 self.code.append(code)
719 self.codeOffset = self.codeOffset + len(code)
720
721 def nextLine(self, lineno):
722 if self.firstline == 0:
723 self.firstline = lineno
724 self.lastline = lineno
725 else:
726 # compute deltas
727 addr = self.codeOffset - self.lastoff
728 line = lineno - self.lastline
729 while addr > 0 or line > 0:
730 # write the values in 1-byte chunks that sum
731 # to desired value
732 trunc_addr = addr
733 trunc_line = line
734 if trunc_addr > 255:
735 trunc_addr = 255
736 if trunc_line > 255:
737 trunc_line = 255
738 self.lnotab.append(trunc_addr)
739 self.lnotab.append(trunc_line)
740 addr = addr - trunc_addr
741 line = line - trunc_line
742 self.lastline = lineno
743 self.lastoff = self.codeOffset
744
745 def getCode(self):
746 return string.join(self.code, '')
747
748 def getTable(self):
749 return string.join(map(chr, self.lnotab), '')
750
751class CompiledModule:
752 """Store the code object for a compiled module
753
754 XXX Not clear how the code objects will be stored. Seems possible
755 that a single code attribute is sufficient, because it will
756 contains references to all the need code objects. That might be
757 messy, though.
758 """
759 MAGIC = (20121 | (ord('\r')<<16) | (ord('\n')<<24))
760
761 def __init__(self, source, filename):
762 self.source = source
763 self.filename = filename
764
765 def compile(self):
766 t = transformer.Transformer()
767 self.ast = t.parsesuite(self.source)
768 cg = CodeGenerator(self.filename)
769 walk(self.ast, cg)
770 self.code = cg.emit()
771
772 def dump(self, path):
773 """create a .pyc file"""
774 f = open(path, 'wb')
775 f.write(self._pyc_header())
776 marshal.dump(self.code, f)
777 f.close()
778
779 def _pyc_header(self):
780 # compile.c uses marshal to write a long directly, with
781 # calling the interface that would also generate a 1-byte code
782 # to indicate the type of the value. simplest way to get the
783 # same effect is to call marshal and then skip the code.
784 magic = marshal.dumps(self.MAGIC)[1:]
785 mtime = os.stat(self.filename)[stat.ST_MTIME]
786 mtime = struct.pack('i', mtime)
787 return magic + mtime
788
Jeremy Hylton8b6323d2000-02-04 00:28:21 +0000789if __name__ == "__main__":
Jeremy Hylton53187f32000-02-08 19:01:29 +0000790 if len(sys.argv) > 1:
791 filename = sys.argv[1]
792 else:
793 filename = 'test.py'
794 buf = open(filename).read()
795 mod = CompiledModule(buf, filename)
796 mod.compile()
797 mod.dump(filename + 'c')