| // Package compile defines the Starlark bytecode compiler. |
| // It is an internal package of the Starlark interpreter and is not directly accessible to clients. |
| // |
| // The compiler generates byte code with optional uint32 operands for a |
| // virtual machine with the following components: |
| // - a program counter, which is an index into the byte code array. |
| // - an operand stack, whose maximum size is computed for each function by the compiler. |
| // - an stack of active iterators. |
| // - an array of local variables. |
| // The number of local variables and their indices are computed by the resolver. |
| // Locals (possibly including parameters) that are shared with nested functions |
| // are 'cells': their locals array slot will contain a value of type 'cell', |
| // an indirect value in a box that is explicitly read/updated by instructions. |
| // - an array of free variables, for nested functions. |
| // Free variables are a subset of the ancestors' cell variables. |
| // As with locals and cells, these are computed by the resolver. |
| // - an array of global variables, shared among all functions in the same module. |
| // All elements are initially nil. |
| // - two maps of predeclared and universal identifiers. |
| // |
| // Each function has a line number table that maps each program counter |
| // offset to a source position, including the column number. |
| // |
| // Operands, logically uint32s, are encoded using little-endian 7-bit |
| // varints, the top bit indicating that more bytes follow. |
| // |
| package compile // import "go.starlark.net/internal/compile" |
| |
| import ( |
| "bytes" |
| "fmt" |
| "log" |
| "os" |
| "path/filepath" |
| "strconv" |
| "strings" |
| "sync" |
| |
| "go.starlark.net/resolve" |
| "go.starlark.net/syntax" |
| ) |
| |
| // Disassemble causes the assembly code for each function |
| // to be printed to stderr as it is generated. |
| var Disassemble = false |
| |
| const debug = false // make code generation verbose, for debugging the compiler |
| |
| // Increment this to force recompilation of saved bytecode files. |
| const Version = 12 |
| |
| type Opcode uint8 |
| |
| // "x DUP x x" is a "stack picture" that describes the state of the |
| // stack before and after execution of the instruction. |
| // |
| // OP<index> indicates an immediate operand that is an index into the |
| // specified table: locals, names, freevars, constants. |
| const ( |
| NOP Opcode = iota // - NOP - |
| |
| // stack operations |
| DUP // x DUP x x |
| DUP2 // x y DUP2 x y x y |
| POP // x POP - |
| EXCH // x y EXCH y x |
| |
| // binary comparisons |
| // (order must match Token) |
| LT |
| GT |
| GE |
| LE |
| EQL |
| NEQ |
| |
| // binary arithmetic |
| // (order must match Token) |
| PLUS |
| MINUS |
| STAR |
| SLASH |
| SLASHSLASH |
| PERCENT |
| AMP |
| PIPE |
| CIRCUMFLEX |
| LTLT |
| GTGT |
| |
| IN |
| |
| // unary operators |
| UPLUS // x UPLUS x |
| UMINUS // x UMINUS -x |
| TILDE // x TILDE ~x |
| |
| NONE // - NONE None |
| TRUE // - TRUE True |
| FALSE // - FALSE False |
| MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args] |
| |
| ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] |
| ITERPOP // - ITERPOP - [pops the iterator stack] |
| NOT // value NOT bool |
| RETURN // value RETURN - |
| SETINDEX // a i new SETINDEX - |
| INDEX // a i INDEX elem |
| SETDICT // dict key value SETDICT - |
| SETDICTUNIQ // dict key value SETDICTUNIQ - |
| APPEND // list elem APPEND - |
| SLICE // x lo hi step SLICE slice |
| INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) |
| MAKEDICT // - MAKEDICT dict |
| |
| // --- opcodes with an argument must go below this line --- |
| |
| // control flow |
| JMP // - JMP<addr> - |
| CJMP // cond CJMP<addr> - |
| ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] |
| // or: - ITERJMP<addr> - (and jump) |
| |
| CONSTANT // - CONSTANT<constant> value |
| MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple |
| MAKELIST // x1 ... xn MAKELIST<n> list |
| MAKEFUNC // defaults+freevars MAKEFUNC<func> fn |
| LOAD // from1 ... fromN module LOAD<n> v1 ... vN |
| SETLOCAL // value SETLOCAL<local> - |
| SETGLOBAL // value SETGLOBAL<global> - |
| LOCAL // - LOCAL<local> value |
| FREE // - FREE<freevar> cell |
| FREECELL // - FREECELL<freevar> value (content of FREE cell) |
| LOCALCELL // - LOCALCELL<local> value (content of LOCAL cell) |
| SETLOCALCELL // value SETLOCALCELL<local> - (set content of LOCAL cell) |
| GLOBAL // - GLOBAL<global> value |
| PREDECLARED // - PREDECLARED<name> value |
| UNIVERSAL // - UNIVERSAL<name> value |
| ATTR // x ATTR<name> y y = x.name |
| SETFIELD // x y SETFIELD<name> - x.name = y |
| UNPACK // iterable UNPACK<n> vn ... v1 |
| |
| // n>>8 is #positional args and n&0xff is #named args (pairs). |
| CALL // fn positional named CALL<n> result |
| CALL_VAR // fn positional named *args CALL_VAR<n> result |
| CALL_KW // fn positional named **kwargs CALL_KW<n> result |
| CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result |
| |
| OpcodeArgMin = JMP |
| OpcodeMax = CALL_VAR_KW |
| ) |
| |
| // TODO(adonovan): add dynamic checks for missing opcodes in the tables below. |
| |
| var opcodeNames = [...]string{ |
| AMP: "amp", |
| APPEND: "append", |
| ATTR: "attr", |
| CALL: "call", |
| CALL_KW: "call_kw ", |
| CALL_VAR: "call_var", |
| CALL_VAR_KW: "call_var_kw", |
| CIRCUMFLEX: "circumflex", |
| CJMP: "cjmp", |
| CONSTANT: "constant", |
| DUP2: "dup2", |
| DUP: "dup", |
| EQL: "eql", |
| EXCH: "exch", |
| FALSE: "false", |
| FREE: "free", |
| FREECELL: "freecell", |
| GE: "ge", |
| GLOBAL: "global", |
| GT: "gt", |
| GTGT: "gtgt", |
| IN: "in", |
| INDEX: "index", |
| INPLACE_ADD: "inplace_add", |
| ITERJMP: "iterjmp", |
| ITERPOP: "iterpop", |
| ITERPUSH: "iterpush", |
| JMP: "jmp", |
| LE: "le", |
| LOAD: "load", |
| LOCAL: "local", |
| LOCALCELL: "localcell", |
| LT: "lt", |
| LTLT: "ltlt", |
| MAKEDICT: "makedict", |
| MAKEFUNC: "makefunc", |
| MAKELIST: "makelist", |
| MAKETUPLE: "maketuple", |
| MANDATORY: "mandatory", |
| MINUS: "minus", |
| NEQ: "neq", |
| NONE: "none", |
| NOP: "nop", |
| NOT: "not", |
| PERCENT: "percent", |
| PIPE: "pipe", |
| PLUS: "plus", |
| POP: "pop", |
| PREDECLARED: "predeclared", |
| RETURN: "return", |
| SETDICT: "setdict", |
| SETDICTUNIQ: "setdictuniq", |
| SETFIELD: "setfield", |
| SETGLOBAL: "setglobal", |
| SETINDEX: "setindex", |
| SETLOCAL: "setlocal", |
| SETLOCALCELL: "setlocalcell", |
| SLASH: "slash", |
| SLASHSLASH: "slashslash", |
| SLICE: "slice", |
| STAR: "star", |
| TILDE: "tilde", |
| TRUE: "true", |
| UMINUS: "uminus", |
| UNIVERSAL: "universal", |
| UNPACK: "unpack", |
| UPLUS: "uplus", |
| } |
| |
| const variableStackEffect = 0x7f |
| |
| // stackEffect records the effect on the size of the operand stack of |
| // each kind of instruction. For some instructions this requires computation. |
| var stackEffect = [...]int8{ |
| AMP: -1, |
| APPEND: -2, |
| ATTR: 0, |
| CALL: variableStackEffect, |
| CALL_KW: variableStackEffect, |
| CALL_VAR: variableStackEffect, |
| CALL_VAR_KW: variableStackEffect, |
| CIRCUMFLEX: -1, |
| CJMP: -1, |
| CONSTANT: +1, |
| DUP2: +2, |
| DUP: +1, |
| EQL: -1, |
| FALSE: +1, |
| FREE: +1, |
| FREECELL: +1, |
| GE: -1, |
| GLOBAL: +1, |
| GT: -1, |
| GTGT: -1, |
| IN: -1, |
| INDEX: -1, |
| INPLACE_ADD: -1, |
| ITERJMP: variableStackEffect, |
| ITERPOP: 0, |
| ITERPUSH: -1, |
| JMP: 0, |
| LE: -1, |
| LOAD: -1, |
| LOCAL: +1, |
| LOCALCELL: +1, |
| LT: -1, |
| LTLT: -1, |
| MAKEDICT: +1, |
| MAKEFUNC: 0, |
| MAKELIST: variableStackEffect, |
| MAKETUPLE: variableStackEffect, |
| MANDATORY: +1, |
| MINUS: -1, |
| NEQ: -1, |
| NONE: +1, |
| NOP: 0, |
| NOT: 0, |
| PERCENT: -1, |
| PIPE: -1, |
| PLUS: -1, |
| POP: -1, |
| PREDECLARED: +1, |
| RETURN: -1, |
| SETLOCALCELL: -1, |
| SETDICT: -3, |
| SETDICTUNIQ: -3, |
| SETFIELD: -2, |
| SETGLOBAL: -1, |
| SETINDEX: -3, |
| SETLOCAL: -1, |
| SLASH: -1, |
| SLASHSLASH: -1, |
| SLICE: -3, |
| STAR: -1, |
| TRUE: +1, |
| UMINUS: 0, |
| UNIVERSAL: +1, |
| UNPACK: variableStackEffect, |
| UPLUS: 0, |
| } |
| |
| func (op Opcode) String() string { |
| if op < OpcodeMax { |
| if name := opcodeNames[op]; name != "" { |
| return name |
| } |
| } |
| return fmt.Sprintf("illegal op (%d)", op) |
| } |
| |
| // A Program is a Starlark file in executable form. |
| // |
| // Programs are serialized by the Program.Encode method, |
| // which must be updated whenever this declaration is changed. |
| type Program struct { |
| Loads []Binding // name (really, string) and position of each load stmt |
| Names []string // names of attributes and predeclared variables |
| Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes |
| Functions []*Funcode |
| Globals []Binding // for error messages and tracing |
| Toplevel *Funcode // module initialization function |
| } |
| |
| // The type of a bytes literal value, to distinguish from text string. |
| type Bytes string |
| |
| // A Funcode is the code of a compiled Starlark function. |
| // |
| // Funcodes are serialized by the encoder.function method, |
| // which must be updated whenever this declaration is changed. |
| type Funcode struct { |
| Prog *Program |
| Pos syntax.Position // position of def or lambda token |
| Name string // name of this function |
| Doc string // docstring of this function |
| Code []byte // the byte code |
| pclinetab []uint16 // mapping from pc to linenum |
| Locals []Binding // locals, parameters first |
| Cells []int // indices of Locals that require cells |
| Freevars []Binding // for tracing |
| MaxStack int |
| NumParams int |
| NumKwonlyParams int |
| HasVarargs, HasKwargs bool |
| |
| // -- transient state -- |
| |
| lntOnce sync.Once |
| lnt []pclinecol // decoded line number table |
| } |
| |
| type pclinecol struct { |
| pc uint32 |
| line, col int32 |
| } |
| |
| // A Binding is the name and position of a binding identifier. |
| type Binding struct { |
| Name string |
| Pos syntax.Position |
| } |
| |
| // A pcomp holds the compiler state for a Program. |
| type pcomp struct { |
| prog *Program // what we're building |
| |
| names map[string]uint32 |
| constants map[interface{}]uint32 |
| functions map[*Funcode]uint32 |
| } |
| |
| // An fcomp holds the compiler state for a Funcode. |
| type fcomp struct { |
| fn *Funcode // what we're building |
| |
| pcomp *pcomp |
| pos syntax.Position // current position of generated code |
| loops []loop |
| block *block |
| } |
| |
| type loop struct { |
| break_, continue_ *block |
| } |
| |
| type block struct { |
| insns []insn |
| |
| // If the last insn is a RETURN, jmp and cjmp are nil. |
| // If the last insn is a CJMP or ITERJMP, |
| // cjmp and jmp are the "true" and "false" successors. |
| // Otherwise, jmp is the sole successor. |
| jmp, cjmp *block |
| |
| initialstack int // for stack depth computation |
| |
| // Used during encoding |
| index int // -1 => not encoded yet |
| addr uint32 |
| } |
| |
| type insn struct { |
| op Opcode |
| arg uint32 |
| line, col int32 |
| } |
| |
| // Position returns the source position for program counter pc. |
| func (fn *Funcode) Position(pc uint32) syntax.Position { |
| fn.lntOnce.Do(fn.decodeLNT) |
| |
| // Binary search to find last LNT entry not greater than pc. |
| // To avoid dynamic dispatch, this is a specialization of |
| // sort.Search using this predicate: |
| // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc) |
| n := len(fn.lnt) |
| i, j := 0, n |
| for i < j { |
| h := int(uint(i+j) >> 1) |
| if !(h >= n-1 || fn.lnt[h+1].pc > pc) { |
| i = h + 1 |
| } else { |
| j = h |
| } |
| } |
| |
| var line, col int32 |
| if i < n { |
| line = fn.lnt[i].line |
| col = fn.lnt[i].col |
| } |
| |
| pos := fn.Pos // copy the (annoyingly inaccessible) filename |
| pos.Col = col |
| pos.Line = line |
| return pos |
| } |
| |
| // decodeLNT decodes the line number table and populates fn.lnt. |
| // It is called at most once. |
| func (fn *Funcode) decodeLNT() { |
| // Conceptually the table contains rows of the form |
| // (pc uint32, line int32, col int32), sorted by pc. |
| // We use a delta encoding, since the differences |
| // between successive pc, line, and column values |
| // are typically small and positive (though line and |
| // especially column differences may be negative). |
| // The delta encoding starts from |
| // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}. |
| // |
| // Each entry is packed into one or more 16-bit values: |
| // Δpc uint4 |
| // Δline int5 |
| // Δcol int6 |
| // incomplete uint1 |
| // The top 4 bits are the unsigned delta pc. |
| // The next 5 bits are the signed line number delta. |
| // The next 6 bits are the signed column number delta. |
| // The bottom bit indicates that more rows follow because |
| // one of the deltas was maxed out. |
| // These field widths were chosen from a sample of real programs, |
| // and allow >97% of rows to be encoded in a single uint16. |
| |
| fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation |
| entry := pclinecol{ |
| pc: 0, |
| line: fn.Pos.Line, |
| col: fn.Pos.Col, |
| } |
| for _, x := range fn.pclinetab { |
| entry.pc += uint32(x) >> 12 |
| entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline |
| entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol |
| if (x & 1) == 0 { |
| fn.lnt = append(fn.lnt, entry) |
| } |
| } |
| } |
| |
| // bindings converts resolve.Bindings to compiled form. |
| func bindings(bindings []*resolve.Binding) []Binding { |
| res := make([]Binding, len(bindings)) |
| for i, bind := range bindings { |
| res[i].Name = bind.First.Name |
| res[i].Pos = bind.First.NamePos |
| } |
| return res |
| } |
| |
| // Expr compiles an expression to a program whose toplevel function evaluates it. |
| func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program { |
| pos := syntax.Start(expr) |
| stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} |
| return File(stmts, pos, name, locals, nil) |
| } |
| |
| // File compiles the statements of a file into a program. |
| func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program { |
| pcomp := &pcomp{ |
| prog: &Program{ |
| Globals: bindings(globals), |
| }, |
| names: make(map[string]uint32), |
| constants: make(map[interface{}]uint32), |
| functions: make(map[*Funcode]uint32), |
| } |
| pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil) |
| |
| return pcomp.prog |
| } |
| |
| func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode { |
| fcomp := &fcomp{ |
| pcomp: pcomp, |
| pos: pos, |
| fn: &Funcode{ |
| Prog: pcomp.prog, |
| Pos: pos, |
| Name: name, |
| Doc: docStringFromBody(stmts), |
| Locals: bindings(locals), |
| Freevars: bindings(freevars), |
| }, |
| } |
| |
| // Record indices of locals that require cells. |
| for i, local := range locals { |
| if local.Scope == resolve.Cell { |
| fcomp.fn.Cells = append(fcomp.fn.Cells, i) |
| } |
| } |
| |
| if debug { |
| fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) |
| } |
| |
| // Convert AST to a CFG of instructions. |
| entry := fcomp.newBlock() |
| fcomp.block = entry |
| fcomp.stmts(stmts) |
| if fcomp.block != nil { |
| fcomp.emit(NONE) |
| fcomp.emit(RETURN) |
| } |
| |
| var oops bool // something bad happened |
| |
| setinitialstack := func(b *block, depth int) { |
| if b.initialstack == -1 { |
| b.initialstack = depth |
| } else if b.initialstack != depth { |
| fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", |
| b.index, b.initialstack, depth) |
| oops = true |
| } |
| } |
| |
| // Linearize the CFG: |
| // compute order, address, and initial |
| // stack depth of each reachable block. |
| var pc uint32 |
| var blocks []*block |
| var maxstack int |
| var visit func(b *block) |
| visit = func(b *block) { |
| if b.index >= 0 { |
| return // already visited |
| } |
| b.index = len(blocks) |
| b.addr = pc |
| blocks = append(blocks, b) |
| |
| stack := b.initialstack |
| if debug { |
| fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) |
| } |
| var cjmpAddr *uint32 |
| var isiterjmp int |
| for i, insn := range b.insns { |
| pc++ |
| |
| // Compute size of argument. |
| if insn.op >= OpcodeArgMin { |
| switch insn.op { |
| case ITERJMP: |
| isiterjmp = 1 |
| fallthrough |
| case CJMP: |
| cjmpAddr = &b.insns[i].arg |
| pc += 4 |
| default: |
| pc += uint32(argLen(insn.arg)) |
| } |
| } |
| |
| // Compute effect on stack. |
| se := insn.stackeffect() |
| if debug { |
| fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) |
| } |
| stack += se |
| if stack < 0 { |
| fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) |
| oops = true |
| } |
| if stack+isiterjmp > maxstack { |
| maxstack = stack + isiterjmp |
| } |
| } |
| |
| if debug { |
| fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", |
| b.addr, b.index) |
| if b.jmp != nil { |
| fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) |
| } |
| if b.cjmp != nil { |
| fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) |
| } |
| } |
| |
| // Place the jmp block next. |
| if b.jmp != nil { |
| // jump threading (empty cycles are impossible) |
| for b.jmp.insns == nil { |
| b.jmp = b.jmp.jmp |
| } |
| |
| setinitialstack(b.jmp, stack+isiterjmp) |
| if b.jmp.index < 0 { |
| // Successor is not yet visited: |
| // place it next and fall through. |
| visit(b.jmp) |
| } else { |
| // Successor already visited; |
| // explicit backward jump required. |
| pc += 5 |
| } |
| } |
| |
| // Then the cjmp block. |
| if b.cjmp != nil { |
| // jump threading (empty cycles are impossible) |
| for b.cjmp.insns == nil { |
| b.cjmp = b.cjmp.jmp |
| } |
| |
| setinitialstack(b.cjmp, stack) |
| visit(b.cjmp) |
| |
| // Patch the CJMP/ITERJMP, if present. |
| if cjmpAddr != nil { |
| *cjmpAddr = b.cjmp.addr |
| } |
| } |
| } |
| setinitialstack(entry, 0) |
| visit(entry) |
| |
| fn := fcomp.fn |
| fn.MaxStack = maxstack |
| |
| // Emit bytecode (and position table). |
| if Disassemble { |
| fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) |
| } |
| fcomp.generate(blocks, pc) |
| |
| if debug { |
| fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) |
| } |
| |
| // Don't panic until we've completed printing of the function. |
| if oops { |
| panic("internal error") |
| } |
| |
| if debug { |
| fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) |
| } |
| |
| return fn |
| } |
| |
| func docStringFromBody(body []syntax.Stmt) string { |
| if len(body) == 0 { |
| return "" |
| } |
| expr, ok := body[0].(*syntax.ExprStmt) |
| if !ok { |
| return "" |
| } |
| lit, ok := expr.X.(*syntax.Literal) |
| if !ok { |
| return "" |
| } |
| if lit.Token != syntax.STRING { |
| return "" |
| } |
| return lit.Value.(string) |
| } |
| |
| func (insn *insn) stackeffect() int { |
| se := int(stackEffect[insn.op]) |
| if se == variableStackEffect { |
| arg := int(insn.arg) |
| switch insn.op { |
| case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: |
| se = -int(2*(insn.arg&0xff) + insn.arg>>8) |
| if insn.op != CALL { |
| se-- |
| } |
| if insn.op == CALL_VAR_KW { |
| se-- |
| } |
| case ITERJMP: |
| // Stack effect differs by successor: |
| // +1 for jmp/false/ok |
| // 0 for cjmp/true/exhausted |
| // Handled specially in caller. |
| se = 0 |
| case MAKELIST, MAKETUPLE: |
| se = 1 - arg |
| case UNPACK: |
| se = arg - 1 |
| default: |
| panic(insn.op) |
| } |
| } |
| return se |
| } |
| |
| // generate emits the linear instruction stream from the CFG, |
| // and builds the PC-to-line number table. |
| func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { |
| code := make([]byte, 0, codelen) |
| var pclinetab []uint16 |
| prev := pclinecol{ |
| pc: 0, |
| line: fcomp.fn.Pos.Line, |
| col: fcomp.fn.Pos.Col, |
| } |
| |
| for _, b := range blocks { |
| if Disassemble { |
| fmt.Fprintf(os.Stderr, "%d:\n", b.index) |
| } |
| pc := b.addr |
| for _, insn := range b.insns { |
| if insn.line != 0 { |
| // Instruction has a source position. Delta-encode it. |
| // See Funcode.Position for the encoding. |
| for { |
| var incomplete uint16 |
| |
| // Δpc, uint4 |
| deltapc := pc - prev.pc |
| if deltapc > 0x0f { |
| deltapc = 0x0f |
| incomplete = 1 |
| } |
| prev.pc += deltapc |
| |
| // Δline, int5 |
| deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f) |
| if !ok { |
| incomplete = 1 |
| } |
| prev.line += deltaline |
| |
| // Δcol, int6 |
| deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f) |
| if !ok { |
| incomplete = 1 |
| } |
| prev.col += deltacol |
| |
| entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete |
| pclinetab = append(pclinetab, entry) |
| if incomplete == 0 { |
| break |
| } |
| } |
| |
| if Disassemble { |
| fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n", |
| filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col) |
| } |
| } |
| if Disassemble { |
| PrintOp(fcomp.fn, pc, insn.op, insn.arg) |
| } |
| code = append(code, byte(insn.op)) |
| pc++ |
| if insn.op >= OpcodeArgMin { |
| if insn.op == CJMP || insn.op == ITERJMP { |
| code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes |
| } else { |
| code = addUint32(code, insn.arg, 0) |
| } |
| pc = uint32(len(code)) |
| } |
| } |
| |
| if b.jmp != nil && b.jmp.index != b.index+1 { |
| addr := b.jmp.addr |
| if Disassemble { |
| fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", |
| pc, addr, b.jmp.index) |
| } |
| code = append(code, byte(JMP)) |
| code = addUint32(code, addr, 4) |
| } |
| } |
| if len(code) != int(codelen) { |
| panic("internal error: wrong code length") |
| } |
| |
| fcomp.fn.pclinetab = pclinetab |
| fcomp.fn.Code = code |
| } |
| |
| // clip returns the value nearest x in the range [min...max], |
| // and whether it equals x. |
| func clip(x, min, max int32) (int32, bool) { |
| if x > max { |
| return max, false |
| } else if x < min { |
| return min, false |
| } else { |
| return x, true |
| } |
| } |
| |
| // addUint32 encodes x as 7-bit little-endian varint. |
| // TODO(adonovan): opt: steal top two bits of opcode |
| // to encode the number of complete bytes that follow. |
| func addUint32(code []byte, x uint32, min int) []byte { |
| end := len(code) + min |
| for x >= 0x80 { |
| code = append(code, byte(x)|0x80) |
| x >>= 7 |
| } |
| code = append(code, byte(x)) |
| // Pad the operand with NOPs to exactly min bytes. |
| for len(code) < end { |
| code = append(code, byte(NOP)) |
| } |
| return code |
| } |
| |
| func argLen(x uint32) int { |
| n := 0 |
| for x >= 0x80 { |
| n++ |
| x >>= 7 |
| } |
| return n + 1 |
| } |
| |
| // PrintOp prints an instruction. |
| // It is provided for debugging. |
| func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { |
| if op < OpcodeArgMin { |
| fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) |
| return |
| } |
| |
| var comment string |
| switch op { |
| case CONSTANT: |
| switch x := fn.Prog.Constants[arg].(type) { |
| case string: |
| comment = strconv.Quote(x) |
| case Bytes: |
| comment = "b" + strconv.Quote(string(x)) |
| default: |
| comment = fmt.Sprint(x) |
| } |
| case MAKEFUNC: |
| comment = fn.Prog.Functions[arg].Name |
| case SETLOCAL, LOCAL: |
| comment = fn.Locals[arg].Name |
| case SETGLOBAL, GLOBAL: |
| comment = fn.Prog.Globals[arg].Name |
| case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: |
| comment = fn.Prog.Names[arg] |
| case FREE: |
| comment = fn.Freevars[arg].Name |
| case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: |
| comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) |
| default: |
| // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: |
| // arg is just a number |
| } |
| var buf bytes.Buffer |
| fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) |
| if comment != "" { |
| fmt.Fprint(&buf, "\t; ", comment) |
| } |
| fmt.Fprintln(&buf) |
| os.Stderr.Write(buf.Bytes()) |
| } |
| |
| // newBlock returns a new block. |
| func (fcomp) newBlock() *block { |
| return &block{index: -1, initialstack: -1} |
| } |
| |
| // emit emits an instruction to the current block. |
| func (fcomp *fcomp) emit(op Opcode) { |
| if op >= OpcodeArgMin { |
| panic("missing arg: " + op.String()) |
| } |
| insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col} |
| fcomp.block.insns = append(fcomp.block.insns, insn) |
| fcomp.pos.Line = 0 |
| fcomp.pos.Col = 0 |
| } |
| |
| // emit1 emits an instruction with an immediate operand. |
| func (fcomp *fcomp) emit1(op Opcode, arg uint32) { |
| if op < OpcodeArgMin { |
| panic("unwanted arg: " + op.String()) |
| } |
| insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col} |
| fcomp.block.insns = append(fcomp.block.insns, insn) |
| fcomp.pos.Line = 0 |
| fcomp.pos.Col = 0 |
| } |
| |
| // jump emits a jump to the specified block. |
| // On return, the current block is unset. |
| func (fcomp *fcomp) jump(b *block) { |
| if b == fcomp.block { |
| panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs |
| } |
| fcomp.block.jmp = b |
| fcomp.block = nil |
| } |
| |
| // condjump emits a conditional jump (CJMP or ITERJMP) |
| // to the specified true/false blocks. |
| // (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) |
| // On return, the current block is unset. |
| func (fcomp *fcomp) condjump(op Opcode, t, f *block) { |
| if !(op == CJMP || op == ITERJMP) { |
| panic("not a conditional jump: " + op.String()) |
| } |
| fcomp.emit1(op, 0) // fill in address later |
| fcomp.block.cjmp = t |
| fcomp.jump(f) |
| } |
| |
| // nameIndex returns the index of the specified name |
| // within the name pool, adding it if necessary. |
| func (pcomp *pcomp) nameIndex(name string) uint32 { |
| index, ok := pcomp.names[name] |
| if !ok { |
| index = uint32(len(pcomp.prog.Names)) |
| pcomp.names[name] = index |
| pcomp.prog.Names = append(pcomp.prog.Names, name) |
| } |
| return index |
| } |
| |
| // constantIndex returns the index of the specified constant |
| // within the constant pool, adding it if necessary. |
| func (pcomp *pcomp) constantIndex(v interface{}) uint32 { |
| index, ok := pcomp.constants[v] |
| if !ok { |
| index = uint32(len(pcomp.prog.Constants)) |
| pcomp.constants[v] = index |
| pcomp.prog.Constants = append(pcomp.prog.Constants, v) |
| } |
| return index |
| } |
| |
| // functionIndex returns the index of the specified function |
| // AST the nestedfun pool, adding it if necessary. |
| func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { |
| index, ok := pcomp.functions[fn] |
| if !ok { |
| index = uint32(len(pcomp.prog.Functions)) |
| pcomp.functions[fn] = index |
| pcomp.prog.Functions = append(pcomp.prog.Functions, fn) |
| } |
| return index |
| } |
| |
| // string emits code to push the specified string. |
| func (fcomp *fcomp) string(s string) { |
| fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s)) |
| } |
| |
| // setPos sets the current source position. |
| // It should be called prior to any operation that can fail dynamically. |
| // All positions are assumed to belong to the same file. |
| func (fcomp *fcomp) setPos(pos syntax.Position) { |
| fcomp.pos = pos |
| } |
| |
| // set emits code to store the top-of-stack value |
| // to the specified local, cell, or global variable. |
| func (fcomp *fcomp) set(id *syntax.Ident) { |
| bind := id.Binding.(*resolve.Binding) |
| switch bind.Scope { |
| case resolve.Local: |
| fcomp.emit1(SETLOCAL, uint32(bind.Index)) |
| case resolve.Cell: |
| fcomp.emit1(SETLOCALCELL, uint32(bind.Index)) |
| case resolve.Global: |
| fcomp.emit1(SETGLOBAL, uint32(bind.Index)) |
| default: |
| log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope) |
| } |
| } |
| |
| // lookup emits code to push the value of the specified variable. |
| func (fcomp *fcomp) lookup(id *syntax.Ident) { |
| bind := id.Binding.(*resolve.Binding) |
| if bind.Scope != resolve.Universal { // (universal lookup can't fail) |
| fcomp.setPos(id.NamePos) |
| } |
| switch bind.Scope { |
| case resolve.Local: |
| fcomp.emit1(LOCAL, uint32(bind.Index)) |
| case resolve.Free: |
| fcomp.emit1(FREECELL, uint32(bind.Index)) |
| case resolve.Cell: |
| fcomp.emit1(LOCALCELL, uint32(bind.Index)) |
| case resolve.Global: |
| fcomp.emit1(GLOBAL, uint32(bind.Index)) |
| case resolve.Predeclared: |
| fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) |
| case resolve.Universal: |
| fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) |
| default: |
| log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope) |
| } |
| } |
| |
| func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { |
| for _, stmt := range stmts { |
| fcomp.stmt(stmt) |
| } |
| } |
| |
| func (fcomp *fcomp) stmt(stmt syntax.Stmt) { |
| switch stmt := stmt.(type) { |
| case *syntax.ExprStmt: |
| if _, ok := stmt.X.(*syntax.Literal); ok { |
| // Opt: don't compile doc comments only to pop them. |
| return |
| } |
| fcomp.expr(stmt.X) |
| fcomp.emit(POP) |
| |
| case *syntax.BranchStmt: |
| // Resolver invariant: break/continue appear only within loops. |
| switch stmt.Token { |
| case syntax.PASS: |
| // no-op |
| case syntax.BREAK: |
| b := fcomp.loops[len(fcomp.loops)-1].break_ |
| fcomp.jump(b) |
| fcomp.block = fcomp.newBlock() // dead code |
| case syntax.CONTINUE: |
| b := fcomp.loops[len(fcomp.loops)-1].continue_ |
| fcomp.jump(b) |
| fcomp.block = fcomp.newBlock() // dead code |
| } |
| |
| case *syntax.IfStmt: |
| // Keep consistent with CondExpr. |
| t := fcomp.newBlock() |
| f := fcomp.newBlock() |
| done := fcomp.newBlock() |
| |
| fcomp.ifelse(stmt.Cond, t, f) |
| |
| fcomp.block = t |
| fcomp.stmts(stmt.True) |
| fcomp.jump(done) |
| |
| fcomp.block = f |
| fcomp.stmts(stmt.False) |
| fcomp.jump(done) |
| |
| fcomp.block = done |
| |
| case *syntax.AssignStmt: |
| switch stmt.Op { |
| case syntax.EQ: |
| // simple assignment: x = y |
| fcomp.expr(stmt.RHS) |
| fcomp.assign(stmt.OpPos, stmt.LHS) |
| |
| case syntax.PLUS_EQ, |
| syntax.MINUS_EQ, |
| syntax.STAR_EQ, |
| syntax.SLASH_EQ, |
| syntax.SLASHSLASH_EQ, |
| syntax.PERCENT_EQ, |
| syntax.AMP_EQ, |
| syntax.PIPE_EQ, |
| syntax.CIRCUMFLEX_EQ, |
| syntax.LTLT_EQ, |
| syntax.GTGT_EQ: |
| // augmented assignment: x += y |
| |
| var set func() |
| |
| // Evaluate "address" of x exactly once to avoid duplicate side-effects. |
| switch lhs := unparen(stmt.LHS).(type) { |
| case *syntax.Ident: |
| // x = ... |
| fcomp.lookup(lhs) |
| set = func() { |
| fcomp.set(lhs) |
| } |
| |
| case *syntax.IndexExpr: |
| // x[y] = ... |
| fcomp.expr(lhs.X) |
| fcomp.expr(lhs.Y) |
| fcomp.emit(DUP2) |
| fcomp.setPos(lhs.Lbrack) |
| fcomp.emit(INDEX) |
| set = func() { |
| fcomp.setPos(lhs.Lbrack) |
| fcomp.emit(SETINDEX) |
| } |
| |
| case *syntax.DotExpr: |
| // x.f = ... |
| fcomp.expr(lhs.X) |
| fcomp.emit(DUP) |
| name := fcomp.pcomp.nameIndex(lhs.Name.Name) |
| fcomp.setPos(lhs.Dot) |
| fcomp.emit1(ATTR, name) |
| set = func() { |
| fcomp.setPos(lhs.Dot) |
| fcomp.emit1(SETFIELD, name) |
| } |
| |
| default: |
| panic(lhs) |
| } |
| |
| fcomp.expr(stmt.RHS) |
| |
| if stmt.Op == syntax.PLUS_EQ { |
| // Allow the runtime to optimize list += iterable. |
| fcomp.setPos(stmt.OpPos) |
| fcomp.emit(INPLACE_ADD) |
| } else { |
| fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) |
| } |
| set() |
| } |
| |
| case *syntax.DefStmt: |
| fcomp.function(stmt.Function.(*resolve.Function)) |
| fcomp.set(stmt.Name) |
| |
| case *syntax.ForStmt: |
| // Keep consistent with ForClause. |
| head := fcomp.newBlock() |
| body := fcomp.newBlock() |
| tail := fcomp.newBlock() |
| |
| fcomp.expr(stmt.X) |
| fcomp.setPos(stmt.For) |
| fcomp.emit(ITERPUSH) |
| fcomp.jump(head) |
| |
| fcomp.block = head |
| fcomp.condjump(ITERJMP, tail, body) |
| |
| fcomp.block = body |
| fcomp.assign(stmt.For, stmt.Vars) |
| fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) |
| fcomp.stmts(stmt.Body) |
| fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] |
| fcomp.jump(head) |
| |
| fcomp.block = tail |
| fcomp.emit(ITERPOP) |
| |
| case *syntax.WhileStmt: |
| head := fcomp.newBlock() |
| body := fcomp.newBlock() |
| done := fcomp.newBlock() |
| |
| fcomp.jump(head) |
| fcomp.block = head |
| fcomp.ifelse(stmt.Cond, body, done) |
| |
| fcomp.block = body |
| fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head}) |
| fcomp.stmts(stmt.Body) |
| fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] |
| fcomp.jump(head) |
| |
| fcomp.block = done |
| |
| case *syntax.ReturnStmt: |
| if stmt.Result != nil { |
| fcomp.expr(stmt.Result) |
| } else { |
| fcomp.emit(NONE) |
| } |
| fcomp.emit(RETURN) |
| fcomp.block = fcomp.newBlock() // dead code |
| |
| case *syntax.LoadStmt: |
| for i := range stmt.From { |
| fcomp.string(stmt.From[i].Name) |
| } |
| module := stmt.Module.Value.(string) |
| fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ |
| Name: module, |
| Pos: stmt.Module.TokenPos, |
| }) |
| fcomp.string(module) |
| fcomp.setPos(stmt.Load) |
| fcomp.emit1(LOAD, uint32(len(stmt.From))) |
| for i := range stmt.To { |
| fcomp.set(stmt.To[len(stmt.To)-1-i]) |
| } |
| |
| default: |
| start, _ := stmt.Span() |
| log.Panicf("%s: exec: unexpected statement %T", start, stmt) |
| } |
| } |
| |
| // assign implements lhs = rhs for arbitrary expressions lhs. |
| // RHS is on top of stack, consumed. |
| func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { |
| switch lhs := lhs.(type) { |
| case *syntax.ParenExpr: |
| // (lhs) = rhs |
| fcomp.assign(pos, lhs.X) |
| |
| case *syntax.Ident: |
| // x = rhs |
| fcomp.set(lhs) |
| |
| case *syntax.TupleExpr: |
| // x, y = rhs |
| fcomp.assignSequence(pos, lhs.List) |
| |
| case *syntax.ListExpr: |
| // [x, y] = rhs |
| fcomp.assignSequence(pos, lhs.List) |
| |
| case *syntax.IndexExpr: |
| // x[y] = rhs |
| fcomp.expr(lhs.X) |
| fcomp.emit(EXCH) |
| fcomp.expr(lhs.Y) |
| fcomp.emit(EXCH) |
| fcomp.setPos(lhs.Lbrack) |
| fcomp.emit(SETINDEX) |
| |
| case *syntax.DotExpr: |
| // x.f = rhs |
| fcomp.expr(lhs.X) |
| fcomp.emit(EXCH) |
| fcomp.setPos(lhs.Dot) |
| fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) |
| |
| default: |
| panic(lhs) |
| } |
| } |
| |
| func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { |
| fcomp.setPos(pos) |
| fcomp.emit1(UNPACK, uint32(len(lhs))) |
| for i := range lhs { |
| fcomp.assign(pos, lhs[i]) |
| } |
| } |
| |
| func (fcomp *fcomp) expr(e syntax.Expr) { |
| switch e := e.(type) { |
| case *syntax.ParenExpr: |
| fcomp.expr(e.X) |
| |
| case *syntax.Ident: |
| fcomp.lookup(e) |
| |
| case *syntax.Literal: |
| // e.Value is int64, float64, *bigInt, string |
| v := e.Value |
| if e.Token == syntax.BYTES { |
| v = Bytes(v.(string)) |
| } |
| fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v)) |
| |
| case *syntax.ListExpr: |
| for _, x := range e.List { |
| fcomp.expr(x) |
| } |
| fcomp.emit1(MAKELIST, uint32(len(e.List))) |
| |
| case *syntax.CondExpr: |
| // Keep consistent with IfStmt. |
| t := fcomp.newBlock() |
| f := fcomp.newBlock() |
| done := fcomp.newBlock() |
| |
| fcomp.ifelse(e.Cond, t, f) |
| |
| fcomp.block = t |
| fcomp.expr(e.True) |
| fcomp.jump(done) |
| |
| fcomp.block = f |
| fcomp.expr(e.False) |
| fcomp.jump(done) |
| |
| fcomp.block = done |
| |
| case *syntax.IndexExpr: |
| fcomp.expr(e.X) |
| fcomp.expr(e.Y) |
| fcomp.setPos(e.Lbrack) |
| fcomp.emit(INDEX) |
| |
| case *syntax.SliceExpr: |
| fcomp.setPos(e.Lbrack) |
| fcomp.expr(e.X) |
| if e.Lo != nil { |
| fcomp.expr(e.Lo) |
| } else { |
| fcomp.emit(NONE) |
| } |
| if e.Hi != nil { |
| fcomp.expr(e.Hi) |
| } else { |
| fcomp.emit(NONE) |
| } |
| if e.Step != nil { |
| fcomp.expr(e.Step) |
| } else { |
| fcomp.emit(NONE) |
| } |
| fcomp.emit(SLICE) |
| |
| case *syntax.Comprehension: |
| if e.Curly { |
| fcomp.emit(MAKEDICT) |
| } else { |
| fcomp.emit1(MAKELIST, 0) |
| } |
| fcomp.comprehension(e, 0) |
| |
| case *syntax.TupleExpr: |
| fcomp.tuple(e.List) |
| |
| case *syntax.DictExpr: |
| fcomp.emit(MAKEDICT) |
| for _, entry := range e.List { |
| entry := entry.(*syntax.DictEntry) |
| fcomp.emit(DUP) |
| fcomp.expr(entry.Key) |
| fcomp.expr(entry.Value) |
| fcomp.setPos(entry.Colon) |
| fcomp.emit(SETDICTUNIQ) |
| } |
| |
| case *syntax.UnaryExpr: |
| fcomp.expr(e.X) |
| fcomp.setPos(e.OpPos) |
| switch e.Op { |
| case syntax.MINUS: |
| fcomp.emit(UMINUS) |
| case syntax.PLUS: |
| fcomp.emit(UPLUS) |
| case syntax.NOT: |
| fcomp.emit(NOT) |
| case syntax.TILDE: |
| fcomp.emit(TILDE) |
| default: |
| log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op) |
| } |
| |
| case *syntax.BinaryExpr: |
| switch e.Op { |
| // short-circuit operators |
| // TODO(adonovan): use ifelse to simplify conditions. |
| case syntax.OR: |
| // x or y => if x then x else y |
| done := fcomp.newBlock() |
| y := fcomp.newBlock() |
| |
| fcomp.expr(e.X) |
| fcomp.emit(DUP) |
| fcomp.condjump(CJMP, done, y) |
| |
| fcomp.block = y |
| fcomp.emit(POP) // discard X |
| fcomp.expr(e.Y) |
| fcomp.jump(done) |
| |
| fcomp.block = done |
| |
| case syntax.AND: |
| // x and y => if x then y else x |
| done := fcomp.newBlock() |
| y := fcomp.newBlock() |
| |
| fcomp.expr(e.X) |
| fcomp.emit(DUP) |
| fcomp.condjump(CJMP, y, done) |
| |
| fcomp.block = y |
| fcomp.emit(POP) // discard X |
| fcomp.expr(e.Y) |
| fcomp.jump(done) |
| |
| fcomp.block = done |
| |
| case syntax.PLUS: |
| fcomp.plus(e) |
| |
| default: |
| // all other strict binary operator (includes comparisons) |
| fcomp.expr(e.X) |
| fcomp.expr(e.Y) |
| fcomp.binop(e.OpPos, e.Op) |
| } |
| |
| case *syntax.DotExpr: |
| fcomp.expr(e.X) |
| fcomp.setPos(e.Dot) |
| fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) |
| |
| case *syntax.CallExpr: |
| fcomp.call(e) |
| |
| case *syntax.LambdaExpr: |
| fcomp.function(e.Function.(*resolve.Function)) |
| |
| default: |
| start, _ := e.Span() |
| log.Panicf("%s: unexpected expr %T", start, e) |
| } |
| } |
| |
| type summand struct { |
| x syntax.Expr |
| plusPos syntax.Position |
| } |
| |
| // plus emits optimized code for ((a+b)+...)+z that avoids naive |
| // quadratic behavior for strings, tuples, and lists, |
| // and folds together adjacent literals of the same type. |
| func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { |
| // Gather all the right operands of the left tree of plusses. |
| // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. |
| args := make([]summand, 0, 2) // common case: 2 operands |
| for plus := e; ; { |
| args = append(args, summand{unparen(plus.Y), plus.OpPos}) |
| left := unparen(plus.X) |
| x, ok := left.(*syntax.BinaryExpr) |
| if !ok || x.Op != syntax.PLUS { |
| args = append(args, summand{x: left}) |
| break |
| } |
| plus = x |
| } |
| // Reverse args to syntactic order. |
| for i, n := 0, len(args)/2; i < n; i++ { |
| j := len(args) - 1 - i |
| args[i], args[j] = args[j], args[i] |
| } |
| |
| // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). |
| out := args[:0] // compact in situ |
| for i := 0; i < len(args); { |
| j := i + 1 |
| if code := addable(args[i].x); code != 0 { |
| for j < len(args) && addable(args[j].x) == code { |
| j++ |
| } |
| if j > i+1 { |
| args[i].x = add(code, args[i:j]) |
| } |
| } |
| out = append(out, args[i]) |
| i = j |
| } |
| args = out |
| |
| // Emit code for an n-ary sum (n > 0). |
| fcomp.expr(args[0].x) |
| for _, summand := range args[1:] { |
| fcomp.expr(summand.x) |
| fcomp.setPos(summand.plusPos) |
| fcomp.emit(PLUS) |
| } |
| |
| // If len(args) > 2, use of an accumulator instead of a chain of |
| // PLUS operations may be more efficient. |
| // However, no gain was measured on a workload analogous to Bazel loading; |
| // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. |
| // |
| // We cannot use a single n-ary SUM operation |
| // a b c SUM<3> |
| // because we need to report a distinct error for each |
| // individual '+' operation, so three additional operations are |
| // needed: |
| // |
| // ACCSTART => create buffer and append to it |
| // ACCUM => append to buffer |
| // ACCEND => get contents of buffer |
| // |
| // For string, list, and tuple values, the interpreter can |
| // optimize these operations by using a mutable buffer. |
| // For all other types, ACCSTART and ACCEND would behave like |
| // the identity function and ACCUM behaves like PLUS. |
| // ACCUM must correctly support user-defined operations |
| // such as list+foo. |
| // |
| // fcomp.emit(ACCSTART) |
| // for _, summand := range args[1:] { |
| // fcomp.expr(summand.x) |
| // fcomp.setPos(summand.plusPos) |
| // fcomp.emit(ACCUM) |
| // } |
| // fcomp.emit(ACCEND) |
| } |
| |
| // addable reports whether e is a statically addable |
| // expression: a [s]tring, [b]ytes, [l]ist, or [t]uple. |
| func addable(e syntax.Expr) rune { |
| switch e := e.(type) { |
| case *syntax.Literal: |
| // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. |
| switch e.Token { |
| case syntax.STRING: |
| return 's' |
| case syntax.BYTES: |
| return 'b' |
| } |
| case *syntax.ListExpr: |
| return 'l' |
| case *syntax.TupleExpr: |
| return 't' |
| } |
| return 0 |
| } |
| |
| // add returns an expression denoting the sum of args, |
| // which are all addable values of the type indicated by code. |
| // The resulting syntax is degenerate, lacking position, etc. |
| func add(code rune, args []summand) syntax.Expr { |
| switch code { |
| case 's', 'b': |
| var buf strings.Builder |
| for _, arg := range args { |
| buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) |
| } |
| tok := syntax.STRING |
| if code == 'b' { |
| tok = syntax.BYTES |
| } |
| return &syntax.Literal{Token: tok, Value: buf.String()} |
| case 'l': |
| var elems []syntax.Expr |
| for _, arg := range args { |
| elems = append(elems, arg.x.(*syntax.ListExpr).List...) |
| } |
| return &syntax.ListExpr{List: elems} |
| case 't': |
| var elems []syntax.Expr |
| for _, arg := range args { |
| elems = append(elems, arg.x.(*syntax.TupleExpr).List...) |
| } |
| return &syntax.TupleExpr{List: elems} |
| } |
| panic(code) |
| } |
| |
| func unparen(e syntax.Expr) syntax.Expr { |
| if p, ok := e.(*syntax.ParenExpr); ok { |
| return unparen(p.X) |
| } |
| return e |
| } |
| |
| func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { |
| // TODO(adonovan): simplify by assuming syntax and compiler constants align. |
| fcomp.setPos(pos) |
| switch op { |
| // arithmetic |
| case syntax.PLUS: |
| fcomp.emit(PLUS) |
| case syntax.MINUS: |
| fcomp.emit(MINUS) |
| case syntax.STAR: |
| fcomp.emit(STAR) |
| case syntax.SLASH: |
| fcomp.emit(SLASH) |
| case syntax.SLASHSLASH: |
| fcomp.emit(SLASHSLASH) |
| case syntax.PERCENT: |
| fcomp.emit(PERCENT) |
| case syntax.AMP: |
| fcomp.emit(AMP) |
| case syntax.PIPE: |
| fcomp.emit(PIPE) |
| case syntax.CIRCUMFLEX: |
| fcomp.emit(CIRCUMFLEX) |
| case syntax.LTLT: |
| fcomp.emit(LTLT) |
| case syntax.GTGT: |
| fcomp.emit(GTGT) |
| case syntax.IN: |
| fcomp.emit(IN) |
| case syntax.NOT_IN: |
| fcomp.emit(IN) |
| fcomp.emit(NOT) |
| |
| // comparisons |
| case syntax.EQL, |
| syntax.NEQ, |
| syntax.GT, |
| syntax.LT, |
| syntax.LE, |
| syntax.GE: |
| fcomp.emit(Opcode(op-syntax.EQL) + EQL) |
| |
| default: |
| log.Panicf("%s: unexpected binary op: %s", pos, op) |
| } |
| } |
| |
| func (fcomp *fcomp) call(call *syntax.CallExpr) { |
| // TODO(adonovan): opt: Use optimized path for calling methods |
| // of built-ins: x.f(...) to avoid materializing a closure. |
| // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { |
| // fcomp.expr(dot.X) |
| // fcomp.args(call) |
| // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) |
| // return |
| // } |
| |
| // usual case |
| fcomp.expr(call.Fn) |
| op, arg := fcomp.args(call) |
| fcomp.setPos(call.Lparen) |
| fcomp.emit1(op, arg) |
| } |
| |
| // args emits code to push a tuple of positional arguments |
| // and a tuple of named arguments containing alternating keys and values. |
| // Either or both tuples may be empty (TODO(adonovan): optimize). |
| func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { |
| var callmode int |
| // Compute the number of each kind of parameter. |
| var p, n int // number of positional, named arguments |
| var varargs, kwargs syntax.Expr |
| for _, arg := range call.Args { |
| if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { |
| |
| // named argument (name, value) |
| fcomp.string(binary.X.(*syntax.Ident).Name) |
| fcomp.expr(binary.Y) |
| n++ |
| continue |
| } |
| if unary, ok := arg.(*syntax.UnaryExpr); ok { |
| if unary.Op == syntax.STAR { |
| callmode |= 1 |
| varargs = unary.X |
| continue |
| } else if unary.Op == syntax.STARSTAR { |
| callmode |= 2 |
| kwargs = unary.X |
| continue |
| } |
| } |
| |
| // positional argument |
| fcomp.expr(arg) |
| p++ |
| } |
| |
| // Python2 and Python3 both permit named arguments |
| // to appear both before and after a *args argument: |
| // f(1, 2, x=3, *[4], y=5, **dict(z=6)) |
| // |
| // They also differ in their evaluation order: |
| // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) |
| // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) |
| // Starlark-in-Java historically used a third order: |
| // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right) |
| // |
| // After discussion in github.com/bazelbuild/starlark#13, the |
| // spec now requires Starlark to statically reject named |
| // arguments after *args (e.g. y=5), and to use Python2-style |
| // evaluation order. This is both easy to implement and |
| // consistent with lexical order: |
| // |
| // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6 |
| |
| // *args |
| if varargs != nil { |
| fcomp.expr(varargs) |
| } |
| |
| // **kwargs |
| if kwargs != nil { |
| fcomp.expr(kwargs) |
| } |
| |
| // TODO(adonovan): avoid this with a more flexible encoding. |
| if p >= 256 || n >= 256 { |
| // resolve already checked this; should be unreachable |
| panic("too many arguments in call") |
| } |
| |
| return CALL + Opcode(callmode), uint32(p<<8 | n) |
| } |
| |
| func (fcomp *fcomp) tuple(elems []syntax.Expr) { |
| for _, elem := range elems { |
| fcomp.expr(elem) |
| } |
| fcomp.emit1(MAKETUPLE, uint32(len(elems))) |
| } |
| |
| func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { |
| if clauseIndex == len(comp.Clauses) { |
| fcomp.emit(DUP) // accumulator |
| if comp.Curly { |
| // dict: {k:v for ...} |
| // Parser ensures that body is of form k:v. |
| // Python-style set comprehensions {body for vars in x} |
| // are not supported. |
| entry := comp.Body.(*syntax.DictEntry) |
| fcomp.expr(entry.Key) |
| fcomp.expr(entry.Value) |
| fcomp.setPos(entry.Colon) |
| fcomp.emit(SETDICT) |
| } else { |
| // list: [body for vars in x] |
| fcomp.expr(comp.Body) |
| fcomp.emit(APPEND) |
| } |
| return |
| } |
| |
| clause := comp.Clauses[clauseIndex] |
| switch clause := clause.(type) { |
| case *syntax.IfClause: |
| t := fcomp.newBlock() |
| done := fcomp.newBlock() |
| fcomp.ifelse(clause.Cond, t, done) |
| |
| fcomp.block = t |
| fcomp.comprehension(comp, clauseIndex+1) |
| fcomp.jump(done) |
| |
| fcomp.block = done |
| return |
| |
| case *syntax.ForClause: |
| // Keep consistent with ForStmt. |
| head := fcomp.newBlock() |
| body := fcomp.newBlock() |
| tail := fcomp.newBlock() |
| |
| fcomp.expr(clause.X) |
| fcomp.setPos(clause.For) |
| fcomp.emit(ITERPUSH) |
| fcomp.jump(head) |
| |
| fcomp.block = head |
| fcomp.condjump(ITERJMP, tail, body) |
| |
| fcomp.block = body |
| fcomp.assign(clause.For, clause.Vars) |
| fcomp.comprehension(comp, clauseIndex+1) |
| fcomp.jump(head) |
| |
| fcomp.block = tail |
| fcomp.emit(ITERPOP) |
| return |
| } |
| |
| start, _ := clause.Span() |
| log.Panicf("%s: unexpected comprehension clause %T", start, clause) |
| } |
| |
| func (fcomp *fcomp) function(f *resolve.Function) { |
| // Evaluation of the defaults may fail, so record the position. |
| fcomp.setPos(f.Pos) |
| |
| // To reduce allocation, we emit a combined tuple |
| // for the defaults and the freevars. |
| // The function knows where to split it at run time. |
| |
| // Generate tuple of parameter defaults. For: |
| // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs) |
| // the tuple is: |
| // (dp2, dp3, MANDATORY, dk2, MANDATORY). |
| ndefaults := 0 |
| seenStar := false |
| for _, param := range f.Params { |
| switch param := param.(type) { |
| case *syntax.BinaryExpr: |
| fcomp.expr(param.Y) |
| ndefaults++ |
| case *syntax.UnaryExpr: |
| seenStar = true // * or *args (also **kwargs) |
| case *syntax.Ident: |
| if seenStar { |
| fcomp.emit(MANDATORY) |
| ndefaults++ |
| } |
| } |
| } |
| |
| // Capture the cells of the function's |
| // free variables from the lexical environment. |
| for _, freevar := range f.FreeVars { |
| // Don't call fcomp.lookup because we want |
| // the cell itself, not its content. |
| switch freevar.Scope { |
| case resolve.Free: |
| fcomp.emit1(FREE, uint32(freevar.Index)) |
| case resolve.Cell: |
| fcomp.emit1(LOCAL, uint32(freevar.Index)) |
| } |
| } |
| |
| fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars))) |
| |
| funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars) |
| |
| if debug { |
| // TODO(adonovan): do compilations sequentially not as a tree, |
| // to make the log easier to read. |
| // Simplify by identifying Toplevel and functionIndex 0. |
| fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) |
| } |
| |
| // def f(a, *, b=1) has only 2 parameters. |
| numParams := len(f.Params) |
| if f.NumKwonlyParams > 0 && !f.HasVarargs { |
| numParams-- |
| } |
| |
| funcode.NumParams = numParams |
| funcode.NumKwonlyParams = f.NumKwonlyParams |
| funcode.HasVarargs = f.HasVarargs |
| funcode.HasKwargs = f.HasKwargs |
| fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) |
| } |
| |
| // ifelse emits a Boolean control flow decision. |
| // On return, the current block is unset. |
| func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { |
| switch cond := cond.(type) { |
| case *syntax.UnaryExpr: |
| if cond.Op == syntax.NOT { |
| // if not x then goto t else goto f |
| // => |
| // if x then goto f else goto t |
| fcomp.ifelse(cond.X, f, t) |
| return |
| } |
| |
| case *syntax.BinaryExpr: |
| switch cond.Op { |
| case syntax.AND: |
| // if x and y then goto t else goto f |
| // => |
| // if x then ifelse(y, t, f) else goto f |
| fcomp.expr(cond.X) |
| y := fcomp.newBlock() |
| fcomp.condjump(CJMP, y, f) |
| |
| fcomp.block = y |
| fcomp.ifelse(cond.Y, t, f) |
| return |
| |
| case syntax.OR: |
| // if x or y then goto t else goto f |
| // => |
| // if x then goto t else ifelse(y, t, f) |
| fcomp.expr(cond.X) |
| y := fcomp.newBlock() |
| fcomp.condjump(CJMP, t, y) |
| |
| fcomp.block = y |
| fcomp.ifelse(cond.Y, t, f) |
| return |
| case syntax.NOT_IN: |
| // if x not in y then goto t else goto f |
| // => |
| // if x in y then goto f else goto t |
| copy := *cond |
| copy.Op = syntax.IN |
| fcomp.expr(©) |
| fcomp.condjump(CJMP, f, t) |
| return |
| } |
| } |
| |
| // general case |
| fcomp.expr(cond) |
| fcomp.condjump(CJMP, t, f) |
| } |