blob: 65a18529f7b6ae02cb3f5eff7e646cb40d15a623 [file] [log] [blame]
Alan Donovan312d1a52017-10-02 10:10:28 -04001// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
Alan Donovane3deafe2018-10-23 11:05:09 -04007// This file defines a recursive-descent parser for Starlark.
8// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
Alan Donovan312d1a52017-10-02 10:10:28 -04009//
10// TODO(adonovan): use syntax.Error more systematically throughout the
11// package. Verify that error positions are correct using the
12// chunkedfile mechanism.
13
alandonovanb7e3b1f2018-12-12 17:14:58 -050014import "log"
Alan Donovan312d1a52017-10-02 10:10:28 -040015
16// Enable this flag to print the token stream and log.Fatal on the first error.
17const debug = false
18
Laurent Le Brun689fc222018-02-22 19:37:18 +010019// A Mode value is a set of flags (or 0) that controls optional parser functionality.
20type Mode uint
21
22const (
23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
24)
25
Alan Donovan312d1a52017-10-02 10:10:28 -040026// Parse parses the input data and returns the corresponding parse tree.
27//
28// If src != nil, ParseFile parses the source from src and the filename
29// is only used when recording position information.
30// The type of the argument for the src parameter must be string,
31// []byte, or io.Reader.
32// If src == nil, ParseFile parses the file specified by filename.
Laurent Le Brun689fc222018-02-22 19:37:18 +010033func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
34 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040035 if err != nil {
36 return nil, err
37 }
38 p := parser{in: in}
39 defer p.in.recover(&err)
40
41 p.nextToken() // read first lookahead token
42 f = p.parseFile()
43 if f != nil {
44 f.Path = filename
45 }
Laurent Le Brun689fc222018-02-22 19:37:18 +010046 p.assignComments(f)
Alan Donovan312d1a52017-10-02 10:10:28 -040047 return f, nil
48}
49
alandonovan30e71c62019-01-04 13:48:12 -050050// ParseCompoundStmt parses a single compound statement:
51// a blank line, a def, for, while, or if statement, or a
52// semicolon-separated list of simple statements followed
53// by a newline. These are the units on which the REPL operates.
54// ParseCompoundStmt does not consume any following input.
55// The parser calls the readline function each
56// time it needs a new line of input.
57func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
58 in, err := newScanner(filename, readline, false)
59 if err != nil {
60 return nil, err
61 }
62
63 p := parser{in: in}
64 defer p.in.recover(&err)
65
66 p.nextToken() // read first lookahead token
67
68 var stmts []Stmt
69 switch p.tok {
70 case DEF, IF, FOR, WHILE:
71 stmts = p.parseStmt(stmts)
72 case NEWLINE:
73 // blank line
74 default:
alandonovan30e71c62019-01-04 13:48:12 -050075 stmts = p.parseSimpleStmt(stmts, false)
alandonovan1258e4d2019-01-25 10:19:30 -050076 // Require but don't consume newline, to avoid blocking again.
77 if p.tok != NEWLINE {
78 p.in.errorf(p.in.pos, "invalid syntax")
79 }
alandonovan30e71c62019-01-04 13:48:12 -050080 }
81
82 return &File{Path: filename, Stmts: stmts}, nil
83}
84
Alan Donovane3deafe2018-10-23 11:05:09 -040085// ParseExpr parses a Starlark expression.
Alan Donovan312d1a52017-10-02 10:10:28 -040086// See Parse for explanation of parameters.
Laurent Le Brun689fc222018-02-22 19:37:18 +010087func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
88 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040089 if err != nil {
90 return nil, err
91 }
92 p := parser{in: in}
93 defer p.in.recover(&err)
94
95 p.nextToken() // read first lookahead token
alandonovan30e71c62019-01-04 13:48:12 -050096
97 // TODO(adonovan): Python's eval would use the equivalent of
98 // parseExpr here, which permits an unparenthesized tuple.
99 // We should too.
Alan Donovan312d1a52017-10-02 10:10:28 -0400100 expr = p.parseTest()
101
Alan Donovanae063842017-10-10 15:46:17 -0400102 // A following newline (e.g. "f()\n") appears outside any brackets,
alandonovanbd7aaf52017-10-11 16:34:02 -0400103 // on a non-blank line, and thus results in a NEWLINE token.
Alan Donovanae063842017-10-10 15:46:17 -0400104 if p.tok == NEWLINE {
105 p.nextToken()
106 }
107
Alan Donovan312d1a52017-10-02 10:10:28 -0400108 if p.tok != EOF {
109 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
110 }
Laurent Le Brun689fc222018-02-22 19:37:18 +0100111 p.assignComments(expr)
Alan Donovan312d1a52017-10-02 10:10:28 -0400112 return expr, nil
113}
114
115type parser struct {
116 in *scanner
117 tok Token
118 tokval tokenValue
119}
120
121// nextToken advances the scanner and returns the position of the
122// previous token.
123func (p *parser) nextToken() Position {
124 oldpos := p.tokval.pos
125 p.tok = p.in.nextToken(&p.tokval)
126 // enable to see the token stream
127 if debug {
128 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
129 }
130 return oldpos
131}
132
133// file_input = (NEWLINE | stmt)* EOF
134func (p *parser) parseFile() *File {
135 var stmts []Stmt
136 for p.tok != EOF {
137 if p.tok == NEWLINE {
138 p.nextToken()
139 continue
140 }
141 stmts = p.parseStmt(stmts)
142 }
143 return &File{Stmts: stmts}
144}
145
146func (p *parser) parseStmt(stmts []Stmt) []Stmt {
147 if p.tok == DEF {
148 return append(stmts, p.parseDefStmt())
149 } else if p.tok == IF {
150 return append(stmts, p.parseIfStmt())
151 } else if p.tok == FOR {
152 return append(stmts, p.parseForStmt())
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100153 } else if p.tok == WHILE {
154 return append(stmts, p.parseWhileStmt())
Alan Donovan312d1a52017-10-02 10:10:28 -0400155 }
alandonovan30e71c62019-01-04 13:48:12 -0500156 return p.parseSimpleStmt(stmts, true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400157}
158
159func (p *parser) parseDefStmt() Stmt {
160 defpos := p.nextToken() // consume DEF
161 id := p.parseIdent()
162 p.consume(LPAREN)
163 params := p.parseParams()
164 p.consume(RPAREN)
165 p.consume(COLON)
166 body := p.parseSuite()
167 return &DefStmt{
168 Def: defpos,
169 Name: id,
170 Function: Function{
171 StartPos: defpos,
172 Params: params,
173 Body: body,
174 },
175 }
176}
177
178func (p *parser) parseIfStmt() Stmt {
179 ifpos := p.nextToken() // consume IF
180 cond := p.parseTest()
181 p.consume(COLON)
182 body := p.parseSuite()
183 ifStmt := &IfStmt{
184 If: ifpos,
185 Cond: cond,
186 True: body,
187 }
188 tail := ifStmt
189 for p.tok == ELIF {
190 elifpos := p.nextToken() // consume ELIF
191 cond := p.parseTest()
192 p.consume(COLON)
193 body := p.parseSuite()
194 elif := &IfStmt{
195 If: elifpos,
196 Cond: cond,
197 True: body,
198 }
199 tail.ElsePos = elifpos
200 tail.False = []Stmt{elif}
201 tail = elif
202 }
203 if p.tok == ELSE {
204 tail.ElsePos = p.nextToken() // consume ELSE
205 p.consume(COLON)
206 tail.False = p.parseSuite()
207 }
208 return ifStmt
209}
210
211func (p *parser) parseForStmt() Stmt {
212 forpos := p.nextToken() // consume FOR
213 vars := p.parseForLoopVariables()
214 p.consume(IN)
215 x := p.parseExpr(false)
216 p.consume(COLON)
217 body := p.parseSuite()
218 return &ForStmt{
219 For: forpos,
220 Vars: vars,
221 X: x,
222 Body: body,
223 }
224}
225
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100226func (p *parser) parseWhileStmt() Stmt {
227 whilepos := p.nextToken() // consume WHILE
228 cond := p.parseTest()
229 p.consume(COLON)
230 body := p.parseSuite()
231 return &WhileStmt{
232 While: whilepos,
233 Cond: cond,
234 Body: body,
235 }
236}
237
Alan Donovan312d1a52017-10-02 10:10:28 -0400238// Equivalent to 'exprlist' production in Python grammar.
239//
240// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
241func (p *parser) parseForLoopVariables() Expr {
242 // Avoid parseExpr because it would consume the IN token
243 // following x in "for x in y: ...".
244 v := p.parsePrimaryWithSuffix()
245 if p.tok != COMMA {
246 return v
247 }
248
249 list := []Expr{v}
250 for p.tok == COMMA {
251 p.nextToken()
252 if terminatesExprList(p.tok) {
253 break
254 }
255 list = append(list, p.parsePrimaryWithSuffix())
256 }
257 return &TupleExpr{List: list}
258}
259
260// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
alandonovan30e71c62019-01-04 13:48:12 -0500261// In REPL mode, it does not consume the NEWLINE.
262func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
Alan Donovan312d1a52017-10-02 10:10:28 -0400263 for {
264 stmts = append(stmts, p.parseSmallStmt())
265 if p.tok != SEMI {
266 break
267 }
268 p.nextToken() // consume SEMI
269 if p.tok == NEWLINE || p.tok == EOF {
270 break
271 }
272 }
273 // EOF without NEWLINE occurs in `if x: pass`, for example.
alandonovan30e71c62019-01-04 13:48:12 -0500274 if p.tok != EOF && consumeNL {
Alan Donovan312d1a52017-10-02 10:10:28 -0400275 p.consume(NEWLINE)
276 }
alandonovan30e71c62019-01-04 13:48:12 -0500277
Alan Donovan312d1a52017-10-02 10:10:28 -0400278 return stmts
279}
280
281// small_stmt = RETURN expr?
282// | PASS | BREAK | CONTINUE
alandonovan6696fc32017-10-20 10:55:17 -0400283// | LOAD ...
Hittorp0a5e39a2018-08-09 15:02:30 +0300284// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign
Alan Donovan312d1a52017-10-02 10:10:28 -0400285// | expr
286func (p *parser) parseSmallStmt() Stmt {
alandonovan6696fc32017-10-20 10:55:17 -0400287 switch p.tok {
288 case RETURN:
Alan Donovan312d1a52017-10-02 10:10:28 -0400289 pos := p.nextToken() // consume RETURN
290 var result Expr
291 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
292 result = p.parseExpr(false)
293 }
294 return &ReturnStmt{Return: pos, Result: result}
Alan Donovan312d1a52017-10-02 10:10:28 -0400295
Alan Donovan312d1a52017-10-02 10:10:28 -0400296 case BREAK, CONTINUE, PASS:
297 tok := p.tok
298 pos := p.nextToken() // consume it
299 return &BranchStmt{Token: tok, TokenPos: pos}
alandonovan6696fc32017-10-20 10:55:17 -0400300
301 case LOAD:
302 return p.parseLoadStmt()
Alan Donovan312d1a52017-10-02 10:10:28 -0400303 }
304
305 // Assignment
306 x := p.parseExpr(false)
307 switch p.tok {
Hittorp0a5e39a2018-08-09 15:02:30 +0300308 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
Alan Donovan312d1a52017-10-02 10:10:28 -0400309 op := p.tok
310 pos := p.nextToken() // consume op
311 rhs := p.parseExpr(false)
312 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
313 }
314
Alan Donovan312d1a52017-10-02 10:10:28 -0400315 // Expression statement (e.g. function call, doc string).
316 return &ExprStmt{X: x}
317}
318
alandonovan6696fc32017-10-20 10:55:17 -0400319// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
320func (p *parser) parseLoadStmt() *LoadStmt {
321 loadPos := p.nextToken() // consume LOAD
322 lparen := p.consume(LPAREN)
Alan Donovan312d1a52017-10-02 10:10:28 -0400323
alandonovan6696fc32017-10-20 10:55:17 -0400324 if p.tok != STRING {
325 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
326 }
327 module := p.parsePrimary().(*Literal)
328
329 var from, to []*Ident
330 for p.tok != RPAREN && p.tok != EOF {
331 p.consume(COMMA)
332 if p.tok == RPAREN {
333 break // allow trailing comma
334 }
335 switch p.tok {
336 case STRING:
Alan Donovan312d1a52017-10-02 10:10:28 -0400337 // load("module", "id")
338 // To name is same as original.
alandonovan6696fc32017-10-20 10:55:17 -0400339 lit := p.parsePrimary().(*Literal)
Alan Donovan312d1a52017-10-02 10:10:28 -0400340 id := &Ident{
341 NamePos: lit.TokenPos.add(`"`),
342 Name: lit.Value.(string),
343 }
alandonovan6696fc32017-10-20 10:55:17 -0400344 to = append(to, id)
345 from = append(from, id)
Alan Donovan312d1a52017-10-02 10:10:28 -0400346
alandonovan6696fc32017-10-20 10:55:17 -0400347 case IDENT:
348 // load("module", to="from")
349 id := p.parseIdent()
350 to = append(to, id)
351 if p.tok != EQ {
alandonovan4b42bbf2017-11-10 13:22:52 -0500352 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
alandonovan6696fc32017-10-20 10:55:17 -0400353 }
354 p.consume(EQ)
355 if p.tok != STRING {
356 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
357 }
358 lit := p.parsePrimary().(*Literal)
359 from = append(from, &Ident{
360 NamePos: lit.TokenPos.add(`"`),
361 Name: lit.Value.(string),
362 })
363
364 case RPAREN:
365 p.in.errorf(p.in.pos, "trailing comma in load statement")
366
367 default:
368 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
369 }
370 }
371 rparen := p.consume(RPAREN)
372
373 if len(to) == 0 {
374 p.in.errorf(lparen, "load statement must import at least 1 symbol")
Alan Donovan312d1a52017-10-02 10:10:28 -0400375 }
376 return &LoadStmt{
377 Load: loadPos,
378 Module: module,
379 To: to,
380 From: from,
alandonovan6696fc32017-10-20 10:55:17 -0400381 Rparen: rparen,
Alan Donovan312d1a52017-10-02 10:10:28 -0400382 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400383}
384
385// suite is typically what follows a COLON (e.g. after DEF or FOR).
386// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
387func (p *parser) parseSuite() []Stmt {
388 if p.tok == NEWLINE {
389 p.nextToken() // consume NEWLINE
390 p.consume(INDENT)
391 var stmts []Stmt
392 for p.tok != OUTDENT && p.tok != EOF {
393 stmts = p.parseStmt(stmts)
394 }
395 p.consume(OUTDENT)
396 return stmts
397 }
398
alandonovan30e71c62019-01-04 13:48:12 -0500399 return p.parseSimpleStmt(nil, true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400400}
401
402func (p *parser) parseIdent() *Ident {
403 if p.tok != IDENT {
404 p.in.error(p.in.pos, "not an identifier")
405 }
406 id := &Ident{
407 NamePos: p.tokval.pos,
408 Name: p.tokval.raw,
409 }
410 p.nextToken()
411 return id
412}
413
414func (p *parser) consume(t Token) Position {
415 if p.tok != t {
416 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
417 }
418 return p.nextToken()
419}
420
421// params = (param COMMA)* param
422// |
423//
424// param = IDENT
425// | IDENT EQ test
Alan Donovan52153852019-02-13 19:18:15 -0500426// | STAR
Alan Donovan312d1a52017-10-02 10:10:28 -0400427// | STAR IDENT
428// | STARSTAR IDENT
429//
430// parseParams parses a parameter list. The resulting expressions are of the form:
431//
Alan Donovan52153852019-02-13 19:18:15 -0500432// *Ident x
433// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y
434// *Unary{Op: STAR} *
435// *Unary{Op: STAR, X: *Ident} *args
436// *Unary{Op: STARSTAR, X: *Ident} **kwargs
Alan Donovan312d1a52017-10-02 10:10:28 -0400437func (p *parser) parseParams() []Expr {
438 var params []Expr
439 stars := false
440 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
441 if len(params) > 0 {
442 p.consume(COMMA)
443 }
444 if p.tok == RPAREN {
445 // list can end with a COMMA if there is neither * nor **
446 if stars {
447 p.in.errorf(p.in.pos, "got %#v, want parameter", p.tok)
448 }
449 break
450 }
451
Alan Donovan52153852019-02-13 19:18:15 -0500452 // * or *args or **kwargs
Taras Tsugrii6be536f2018-12-09 18:07:56 -0800453 if p.tok == STAR || p.tok == STARSTAR {
Alan Donovan312d1a52017-10-02 10:10:28 -0400454 stars = true
alandonovan97d77632018-12-09 21:18:13 -0500455 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400456 pos := p.nextToken()
Alan Donovan52153852019-02-13 19:18:15 -0500457 var x Expr
458 if op == STARSTAR || p.tok == IDENT {
459 x = p.parseIdent()
460 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400461 params = append(params, &UnaryExpr{
462 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500463 Op: op,
Alan Donovan52153852019-02-13 19:18:15 -0500464 X: x,
Alan Donovan312d1a52017-10-02 10:10:28 -0400465 })
466 continue
467 }
468
469 // IDENT
470 // IDENT = test
471 id := p.parseIdent()
472 if p.tok == EQ { // default value
473 eq := p.nextToken()
474 dflt := p.parseTest()
475 params = append(params, &BinaryExpr{
476 X: id,
477 OpPos: eq,
478 Op: EQ,
479 Y: dflt,
480 })
481 continue
482 }
483
484 params = append(params, id)
485 }
486 return params
487}
488
489// parseExpr parses an expression, possible consisting of a
490// comma-separated list of 'test' expressions.
491//
492// In many cases we must use parseTest to avoid ambiguity such as
493// f(x, y) vs. f((x, y)).
494func (p *parser) parseExpr(inParens bool) Expr {
495 x := p.parseTest()
496 if p.tok != COMMA {
497 return x
498 }
499
500 // tuple
501 exprs := p.parseExprs([]Expr{x}, inParens)
502 return &TupleExpr{List: exprs}
503}
504
505// parseExprs parses a comma-separated list of expressions, starting with the comma.
506// It is used to parse tuples and list elements.
507// expr_list = (',' expr)* ','?
508func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
509 for p.tok == COMMA {
510 pos := p.nextToken()
511 if terminatesExprList(p.tok) {
512 if !allowTrailingComma {
513 p.in.error(pos, "unparenthesized tuple with trailing comma")
514 }
515 break
516 }
517 exprs = append(exprs, p.parseTest())
518 }
519 return exprs
520}
521
522// parseTest parses a 'test', a single-component expression.
523func (p *parser) parseTest() Expr {
524 if p.tok == LAMBDA {
alandonovanf9faf3b2018-01-16 10:06:02 -0500525 return p.parseLambda(true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400526 }
527
528 x := p.parseTestPrec(0)
529
530 // conditional expression (t IF cond ELSE f)
531 if p.tok == IF {
532 ifpos := p.nextToken()
533 cond := p.parseTestPrec(0)
534 if p.tok != ELSE {
535 p.in.error(ifpos, "conditional expression without else clause")
536 }
537 elsepos := p.nextToken()
538 else_ := p.parseTest()
539 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
540 }
541
542 return x
543}
544
alandonovanf9faf3b2018-01-16 10:06:02 -0500545// parseTestNoCond parses a a single-component expression without
546// consuming a trailing 'if expr else expr'.
547func (p *parser) parseTestNoCond() Expr {
548 if p.tok == LAMBDA {
549 return p.parseLambda(false)
550 }
551 return p.parseTestPrec(0)
552}
553
554// parseLambda parses a lambda expression.
555// The allowCond flag allows the body to be an 'a if b else c' conditional.
556func (p *parser) parseLambda(allowCond bool) Expr {
557 lambda := p.nextToken()
558 var params []Expr
559 if p.tok != COLON {
560 params = p.parseParams()
561 }
562 p.consume(COLON)
563
564 var body Expr
565 if allowCond {
566 body = p.parseTest()
567 } else {
568 body = p.parseTestNoCond()
569 }
570
571 return &LambdaExpr{
572 Lambda: lambda,
573 Function: Function{
574 StartPos: lambda,
575 Params: params,
576 Body: []Stmt{&ReturnStmt{Result: body}},
577 },
578 }
579}
580
Alan Donovan312d1a52017-10-02 10:10:28 -0400581func (p *parser) parseTestPrec(prec int) Expr {
582 if prec >= len(preclevels) {
583 return p.parsePrimaryWithSuffix()
584 }
585
586 // expr = NOT expr
587 if p.tok == NOT && prec == int(precedence[NOT]) {
588 pos := p.nextToken()
alandonovanc7df0452018-10-04 17:10:12 -0400589 x := p.parseTestPrec(prec)
Alan Donovan312d1a52017-10-02 10:10:28 -0400590 return &UnaryExpr{
591 OpPos: pos,
592 Op: NOT,
593 X: x,
594 }
595 }
596
597 return p.parseBinopExpr(prec)
598}
599
600// expr = test (OP test)*
601// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
602func (p *parser) parseBinopExpr(prec int) Expr {
603 x := p.parseTestPrec(prec + 1)
604 for first := true; ; first = false {
605 if p.tok == NOT {
606 p.nextToken() // consume NOT
607 // In this context, NOT must be followed by IN.
608 // Replace NOT IN by a single NOT_IN token.
609 if p.tok != IN {
610 p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
611 }
612 p.tok = NOT_IN
613 }
614
615 // Binary operator of specified precedence?
616 opprec := int(precedence[p.tok])
617 if opprec < prec {
618 return x
619 }
620
621 // Comparisons are non-associative.
622 if !first && opprec == int(precedence[EQL]) {
623 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
624 x.(*BinaryExpr).Op, p.tok)
625 }
626
627 op := p.tok
628 pos := p.nextToken()
629 y := p.parseTestPrec(opprec + 1)
alandonovan60e4b3d2018-04-02 12:32:39 -0400630 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
Alan Donovan312d1a52017-10-02 10:10:28 -0400631 }
632}
633
634// precedence maps each operator to its precedence (0-7), or -1 for other tokens.
635var precedence [maxToken]int8
636
637// preclevels groups operators of equal precedence.
638// Comparisons are nonassociative; other binary operators associate to the left.
Hittorp0a5e39a2018-08-09 15:02:30 +0300639// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
alandonovan04aba6e2018-11-05 17:45:33 -0500640// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
Alan Donovan312d1a52017-10-02 10:10:28 -0400641var preclevels = [...][]Token{
alandonovanc7df0452018-10-04 17:10:12 -0400642 {OR}, // or
643 {AND}, // and
644 {NOT}, // not (unary)
Alan Donovan312d1a52017-10-02 10:10:28 -0400645 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
alandonovanc7df0452018-10-04 17:10:12 -0400646 {PIPE}, // |
647 {CIRCUMFLEX}, // ^
648 {AMP}, // &
649 {LTLT, GTGT}, // << >>
650 {MINUS, PLUS}, // -
651 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / //
Alan Donovan312d1a52017-10-02 10:10:28 -0400652}
653
654func init() {
655 // populate precedence table
656 for i := range precedence {
657 precedence[i] = -1
658 }
659 for level, tokens := range preclevels {
660 for _, tok := range tokens {
661 precedence[tok] = int8(level)
662 }
663 }
664}
665
Alan Donovan312d1a52017-10-02 10:10:28 -0400666// primary_with_suffix = primary
667// | primary '.' IDENT
668// | primary slice_suffix
669// | primary call_suffix
670func (p *parser) parsePrimaryWithSuffix() Expr {
671 x := p.parsePrimary()
672 for {
673 switch p.tok {
674 case DOT:
675 dot := p.nextToken()
676 id := p.parseIdent()
677 x = &DotExpr{Dot: dot, X: x, Name: id}
678 case LBRACK:
679 x = p.parseSliceSuffix(x)
680 case LPAREN:
681 x = p.parseCallSuffix(x)
682 default:
683 return x
684 }
685 }
686}
687
688// slice_suffix = '[' expr? ':' expr? ':' expr? ']'
689func (p *parser) parseSliceSuffix(x Expr) Expr {
690 lbrack := p.nextToken()
691 var lo, hi, step Expr
692 if p.tok != COLON {
693 y := p.parseExpr(false)
694
695 // index x[y]
696 if p.tok == RBRACK {
697 rbrack := p.nextToken()
698 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
699 }
700
701 lo = y
702 }
703
704 // slice or substring x[lo:hi:step]
705 if p.tok == COLON {
706 p.nextToken()
707 if p.tok != COLON && p.tok != RBRACK {
708 hi = p.parseTest()
709 }
710 }
711 if p.tok == COLON {
712 p.nextToken()
713 if p.tok != RBRACK {
714 step = p.parseTest()
715 }
716 }
717 rbrack := p.consume(RBRACK)
718 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
719}
720
721// call_suffix = '(' arg_list? ')'
722func (p *parser) parseCallSuffix(fn Expr) Expr {
723 lparen := p.consume(LPAREN)
724 var rparen Position
725 var args []Expr
726 if p.tok == RPAREN {
727 rparen = p.nextToken()
728 } else {
729 args = p.parseArgs()
730 rparen = p.consume(RPAREN)
731 }
732 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
733}
734
735// parseArgs parses a list of actual parameter values (arguments).
736// It mirrors the structure of parseParams.
737// arg_list = ((arg COMMA)* arg COMMA?)?
738func (p *parser) parseArgs() []Expr {
739 var args []Expr
740 stars := false
741 for p.tok != RPAREN && p.tok != EOF {
742 if len(args) > 0 {
743 p.consume(COMMA)
744 }
745 if p.tok == RPAREN {
746 // list can end with a COMMA if there is neither * nor **
747 if stars {
748 p.in.errorf(p.in.pos, `got %#v, want argument`, p.tok)
749 }
750 break
751 }
752
alandonovan97d77632018-12-09 21:18:13 -0500753 // *args or **kwargs
754 if p.tok == STAR || p.tok == STARSTAR {
Alan Donovan312d1a52017-10-02 10:10:28 -0400755 stars = true
alandonovan97d77632018-12-09 21:18:13 -0500756 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400757 pos := p.nextToken()
758 x := p.parseTest()
759 args = append(args, &UnaryExpr{
760 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500761 Op: op,
Alan Donovan312d1a52017-10-02 10:10:28 -0400762 X: x,
763 })
764 continue
765 }
766
767 // We use a different strategy from Bazel here to stay within LL(1).
768 // Instead of looking ahead two tokens (IDENT, EQ) we parse
769 // 'test = test' then check that the first was an IDENT.
770 x := p.parseTest()
771
772 if p.tok == EQ {
773 // name = value
774 if _, ok := x.(*Ident); !ok {
775 p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
776 }
777 eq := p.nextToken()
778 y := p.parseTest()
779 x = &BinaryExpr{
780 X: x,
781 OpPos: eq,
782 Op: EQ,
783 Y: y,
784 }
785 }
786
787 args = append(args, x)
788 }
789 return args
790}
791
792// primary = IDENT
793// | INT | FLOAT
794// | STRING
795// | '[' ... // list literal or comprehension
796// | '{' ... // dict literal or comprehension
797// | '(' ... // tuple or parenthesized expression
alandonovanc7df0452018-10-04 17:10:12 -0400798// | ('-'|'+'|'~') primary_with_suffix
Alan Donovan312d1a52017-10-02 10:10:28 -0400799func (p *parser) parsePrimary() Expr {
800 switch p.tok {
801 case IDENT:
802 return p.parseIdent()
803
804 case INT, FLOAT, STRING:
805 var val interface{}
806 tok := p.tok
807 switch tok {
808 case INT:
Mohamed Elqdusy69e96152018-01-22 20:00:29 +0100809 if p.tokval.bigInt != nil {
810 val = p.tokval.bigInt
811 } else {
812 val = p.tokval.int
813 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400814 case FLOAT:
815 val = p.tokval.float
816 case STRING:
817 val = p.tokval.string
818 }
819 raw := p.tokval.raw
820 pos := p.nextToken()
821 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
822
823 case LBRACK:
824 return p.parseList()
825
826 case LBRACE:
827 return p.parseDict()
828
829 case LPAREN:
830 lparen := p.nextToken()
831 if p.tok == RPAREN {
832 // empty tuple
833 rparen := p.nextToken()
834 return &TupleExpr{Lparen: lparen, Rparen: rparen}
835 }
836 e := p.parseExpr(true) // allow trailing comma
Laurent Le Brun28ceca72018-02-26 15:01:53 +0100837 rparen := p.consume(RPAREN)
838 return &ParenExpr{
839 Lparen: lparen,
840 X: e,
841 Rparen: rparen,
842 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400843
alandonovanc7df0452018-10-04 17:10:12 -0400844 case MINUS, PLUS, TILDE: // unary
Alan Donovan312d1a52017-10-02 10:10:28 -0400845 tok := p.tok
846 pos := p.nextToken()
847 x := p.parsePrimaryWithSuffix()
848 return &UnaryExpr{
849 OpPos: pos,
850 Op: tok,
851 X: x,
852 }
853 }
854 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
855 panic("unreachable")
856}
857
858// list = '[' ']'
859// | '[' expr ']'
860// | '[' expr expr_list ']'
861// | '[' expr (FOR loop_variables IN expr)+ ']'
862func (p *parser) parseList() Expr {
863 lbrack := p.nextToken()
864 if p.tok == RBRACK {
865 // empty List
866 rbrack := p.nextToken()
867 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
868 }
869
870 x := p.parseTest()
871
872 if p.tok == FOR {
873 // list comprehension
874 return p.parseComprehensionSuffix(lbrack, x, RBRACK)
875 }
876
877 exprs := []Expr{x}
878 if p.tok == COMMA {
879 // multi-item list literal
880 exprs = p.parseExprs(exprs, true) // allow trailing comma
881 }
882
883 rbrack := p.consume(RBRACK)
884 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
885}
886
887// dict = '{' '}'
888// | '{' dict_entry_list '}'
889// | '{' dict_entry FOR loop_variables IN expr '}'
890func (p *parser) parseDict() Expr {
891 lbrace := p.nextToken()
892 if p.tok == RBRACE {
893 // empty dict
894 rbrace := p.nextToken()
895 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
896 }
897
898 x := p.parseDictEntry()
899
900 if p.tok == FOR {
901 // dict comprehension
902 return p.parseComprehensionSuffix(lbrace, x, RBRACE)
903 }
904
905 entries := []Expr{x}
906 for p.tok == COMMA {
907 p.nextToken()
908 if p.tok == RBRACE {
909 break
910 }
911 entries = append(entries, p.parseDictEntry())
912 }
913
914 rbrace := p.consume(RBRACE)
915 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
916}
917
918// dict_entry = test ':' test
919func (p *parser) parseDictEntry() *DictEntry {
920 k := p.parseTest()
921 colon := p.consume(COLON)
922 v := p.parseTest()
923 return &DictEntry{Key: k, Colon: colon, Value: v}
924}
925
926// comp_suffix = FOR loopvars IN expr comp_suffix
927// | IF expr comp_suffix
928// | ']' or ')' (end)
929//
930// There can be multiple FOR/IF clauses; the first is always a FOR.
931func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
932 var clauses []Node
933 for p.tok != endBrace {
934 if p.tok == FOR {
935 pos := p.nextToken()
936 vars := p.parseForLoopVariables()
937 in := p.consume(IN)
938 // Following Python 3, the operand of IN cannot be:
939 // - a conditional expression ('x if y else z'),
940 // due to conflicts in Python grammar
941 // ('if' is used by the comprehension);
942 // - a lambda expression
943 // - an unparenthesized tuple.
944 x := p.parseTestPrec(0)
945 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
946 } else if p.tok == IF {
947 pos := p.nextToken()
alandonovanf9faf3b2018-01-16 10:06:02 -0500948 cond := p.parseTestNoCond()
Alan Donovan312d1a52017-10-02 10:10:28 -0400949 clauses = append(clauses, &IfClause{If: pos, Cond: cond})
950 } else {
951 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
952 }
953 }
954 rbrace := p.nextToken()
955
956 return &Comprehension{
957 Curly: endBrace == RBRACE,
958 Lbrack: lbrace,
959 Body: body,
960 Clauses: clauses,
961 Rbrack: rbrace,
962 }
963}
964
965func terminatesExprList(tok Token) bool {
966 switch tok {
967 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
968 return true
969 }
970 return false
971}
Laurent Le Brun689fc222018-02-22 19:37:18 +0100972
973// Comment assignment.
974// We build two lists of all subnodes, preorder and postorder.
975// The preorder list is ordered by start location, with outer nodes first.
976// The postorder list is ordered by end location, with outer nodes last.
977// We use the preorder list to assign each whole-line comment to the syntax
978// immediately following it, and we use the postorder list to assign each
979// end-of-line comment to the syntax immediately preceding it.
980
981// flattenAST returns the list of AST nodes, both in prefix order and in postfix
982// order.
983func flattenAST(root Node) (pre, post []Node) {
984 stack := []Node{}
985 Walk(root, func(n Node) bool {
986 if n != nil {
987 pre = append(pre, n)
988 stack = append(stack, n)
989 } else {
990 post = append(post, stack[len(stack)-1])
991 stack = stack[:len(stack)-1]
992 }
993 return true
994 })
995 return pre, post
996}
997
998// assignComments attaches comments to nearby syntax.
999func (p *parser) assignComments(n Node) {
1000 // Leave early if there are no comments
1001 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
1002 return
1003 }
1004
1005 pre, post := flattenAST(n)
1006
1007 // Assign line comments to syntax immediately following.
1008 line := p.in.lineComments
1009 for _, x := range pre {
1010 start, _ := x.Span()
1011
1012 switch x.(type) {
1013 case *File:
1014 continue
1015 }
1016
1017 for len(line) > 0 && !start.isBefore(line[0].Start) {
1018 x.AllocComments()
1019 x.Comments().Before = append(x.Comments().Before, line[0])
1020 line = line[1:]
1021 }
1022 }
1023
1024 // Remaining line comments go at end of file.
1025 if len(line) > 0 {
1026 n.AllocComments()
1027 n.Comments().After = append(n.Comments().After, line...)
1028 }
1029
1030 // Assign suffix comments to syntax immediately before.
1031 suffix := p.in.suffixComments
1032 for i := len(post) - 1; i >= 0; i-- {
1033 x := post[i]
1034
1035 // Do not assign suffix comments to file
1036 switch x.(type) {
1037 case *File:
1038 continue
1039 }
1040
1041 _, end := x.Span()
1042 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
1043 x.AllocComments()
1044 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
1045 suffix = suffix[:len(suffix)-1]
1046 }
1047 }
1048}