blob: 50b808743ee4548dcfd3b52e10dabccd8cd95ab3 [file] [log] [blame]
Alan Donovan312d1a52017-10-02 10:10:28 -04001// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
Alan Donovane3deafe2018-10-23 11:05:09 -04007// This file defines a recursive-descent parser for Starlark.
8// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
Alan Donovan312d1a52017-10-02 10:10:28 -04009//
10// TODO(adonovan): use syntax.Error more systematically throughout the
11// package. Verify that error positions are correct using the
12// chunkedfile mechanism.
13
alandonovanb7e3b1f2018-12-12 17:14:58 -050014import "log"
Alan Donovan312d1a52017-10-02 10:10:28 -040015
16// Enable this flag to print the token stream and log.Fatal on the first error.
17const debug = false
18
Laurent Le Brun689fc222018-02-22 19:37:18 +010019// A Mode value is a set of flags (or 0) that controls optional parser functionality.
20type Mode uint
21
22const (
23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
24)
25
Alan Donovan312d1a52017-10-02 10:10:28 -040026// Parse parses the input data and returns the corresponding parse tree.
27//
28// If src != nil, ParseFile parses the source from src and the filename
29// is only used when recording position information.
30// The type of the argument for the src parameter must be string,
alandonovan0a10e4f2021-02-08 12:20:22 -050031// []byte, io.Reader, or FilePortion.
Alan Donovan312d1a52017-10-02 10:10:28 -040032// If src == nil, ParseFile parses the file specified by filename.
Laurent Le Brun689fc222018-02-22 19:37:18 +010033func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
34 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040035 if err != nil {
36 return nil, err
37 }
38 p := parser{in: in}
39 defer p.in.recover(&err)
40
41 p.nextToken() // read first lookahead token
42 f = p.parseFile()
43 if f != nil {
44 f.Path = filename
45 }
Laurent Le Brun689fc222018-02-22 19:37:18 +010046 p.assignComments(f)
Alan Donovan312d1a52017-10-02 10:10:28 -040047 return f, nil
48}
49
alandonovan30e71c62019-01-04 13:48:12 -050050// ParseCompoundStmt parses a single compound statement:
51// a blank line, a def, for, while, or if statement, or a
52// semicolon-separated list of simple statements followed
53// by a newline. These are the units on which the REPL operates.
54// ParseCompoundStmt does not consume any following input.
55// The parser calls the readline function each
56// time it needs a new line of input.
57func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
58 in, err := newScanner(filename, readline, false)
59 if err != nil {
60 return nil, err
61 }
62
63 p := parser{in: in}
64 defer p.in.recover(&err)
65
66 p.nextToken() // read first lookahead token
67
68 var stmts []Stmt
69 switch p.tok {
70 case DEF, IF, FOR, WHILE:
71 stmts = p.parseStmt(stmts)
72 case NEWLINE:
73 // blank line
74 default:
alandonovan30e71c62019-01-04 13:48:12 -050075 stmts = p.parseSimpleStmt(stmts, false)
alandonovan1258e4d2019-01-25 10:19:30 -050076 // Require but don't consume newline, to avoid blocking again.
77 if p.tok != NEWLINE {
78 p.in.errorf(p.in.pos, "invalid syntax")
79 }
alandonovan30e71c62019-01-04 13:48:12 -050080 }
81
82 return &File{Path: filename, Stmts: stmts}, nil
83}
84
Alan Donovane3deafe2018-10-23 11:05:09 -040085// ParseExpr parses a Starlark expression.
alandonovanf26cf182019-05-28 16:17:30 -040086// A comma-separated list of expressions is parsed as a tuple.
Alan Donovan312d1a52017-10-02 10:10:28 -040087// See Parse for explanation of parameters.
Laurent Le Brun689fc222018-02-22 19:37:18 +010088func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
89 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040090 if err != nil {
91 return nil, err
92 }
93 p := parser{in: in}
94 defer p.in.recover(&err)
95
96 p.nextToken() // read first lookahead token
alandonovan30e71c62019-01-04 13:48:12 -050097
alandonovanf26cf182019-05-28 16:17:30 -040098 // Use parseExpr, not parseTest, to permit an unparenthesized tuple.
99 expr = p.parseExpr(false)
Alan Donovan312d1a52017-10-02 10:10:28 -0400100
Alan Donovanae063842017-10-10 15:46:17 -0400101 // A following newline (e.g. "f()\n") appears outside any brackets,
alandonovanbd7aaf52017-10-11 16:34:02 -0400102 // on a non-blank line, and thus results in a NEWLINE token.
Alan Donovanae063842017-10-10 15:46:17 -0400103 if p.tok == NEWLINE {
104 p.nextToken()
105 }
106
Alan Donovan312d1a52017-10-02 10:10:28 -0400107 if p.tok != EOF {
108 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
109 }
Laurent Le Brun689fc222018-02-22 19:37:18 +0100110 p.assignComments(expr)
Alan Donovan312d1a52017-10-02 10:10:28 -0400111 return expr, nil
112}
113
114type parser struct {
115 in *scanner
116 tok Token
117 tokval tokenValue
118}
119
120// nextToken advances the scanner and returns the position of the
121// previous token.
122func (p *parser) nextToken() Position {
123 oldpos := p.tokval.pos
124 p.tok = p.in.nextToken(&p.tokval)
125 // enable to see the token stream
126 if debug {
127 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
128 }
129 return oldpos
130}
131
132// file_input = (NEWLINE | stmt)* EOF
133func (p *parser) parseFile() *File {
134 var stmts []Stmt
135 for p.tok != EOF {
136 if p.tok == NEWLINE {
137 p.nextToken()
138 continue
139 }
140 stmts = p.parseStmt(stmts)
141 }
142 return &File{Stmts: stmts}
143}
144
145func (p *parser) parseStmt(stmts []Stmt) []Stmt {
146 if p.tok == DEF {
147 return append(stmts, p.parseDefStmt())
148 } else if p.tok == IF {
149 return append(stmts, p.parseIfStmt())
150 } else if p.tok == FOR {
151 return append(stmts, p.parseForStmt())
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100152 } else if p.tok == WHILE {
153 return append(stmts, p.parseWhileStmt())
Alan Donovan312d1a52017-10-02 10:10:28 -0400154 }
alandonovan30e71c62019-01-04 13:48:12 -0500155 return p.parseSimpleStmt(stmts, true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400156}
157
158func (p *parser) parseDefStmt() Stmt {
159 defpos := p.nextToken() // consume DEF
160 id := p.parseIdent()
161 p.consume(LPAREN)
162 params := p.parseParams()
163 p.consume(RPAREN)
164 p.consume(COLON)
165 body := p.parseSuite()
166 return &DefStmt{
alandonovan6ddc71c2019-06-04 09:08:55 -0400167 Def: defpos,
168 Name: id,
169 Params: params,
170 Body: body,
Alan Donovan312d1a52017-10-02 10:10:28 -0400171 }
172}
173
174func (p *parser) parseIfStmt() Stmt {
175 ifpos := p.nextToken() // consume IF
176 cond := p.parseTest()
177 p.consume(COLON)
178 body := p.parseSuite()
179 ifStmt := &IfStmt{
180 If: ifpos,
181 Cond: cond,
182 True: body,
183 }
184 tail := ifStmt
185 for p.tok == ELIF {
186 elifpos := p.nextToken() // consume ELIF
187 cond := p.parseTest()
188 p.consume(COLON)
189 body := p.parseSuite()
190 elif := &IfStmt{
191 If: elifpos,
192 Cond: cond,
193 True: body,
194 }
195 tail.ElsePos = elifpos
196 tail.False = []Stmt{elif}
197 tail = elif
198 }
199 if p.tok == ELSE {
200 tail.ElsePos = p.nextToken() // consume ELSE
201 p.consume(COLON)
202 tail.False = p.parseSuite()
203 }
204 return ifStmt
205}
206
207func (p *parser) parseForStmt() Stmt {
208 forpos := p.nextToken() // consume FOR
209 vars := p.parseForLoopVariables()
210 p.consume(IN)
211 x := p.parseExpr(false)
212 p.consume(COLON)
213 body := p.parseSuite()
214 return &ForStmt{
215 For: forpos,
216 Vars: vars,
217 X: x,
218 Body: body,
219 }
220}
221
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100222func (p *parser) parseWhileStmt() Stmt {
223 whilepos := p.nextToken() // consume WHILE
224 cond := p.parseTest()
225 p.consume(COLON)
226 body := p.parseSuite()
227 return &WhileStmt{
228 While: whilepos,
229 Cond: cond,
230 Body: body,
231 }
232}
233
Alan Donovan312d1a52017-10-02 10:10:28 -0400234// Equivalent to 'exprlist' production in Python grammar.
235//
236// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
237func (p *parser) parseForLoopVariables() Expr {
238 // Avoid parseExpr because it would consume the IN token
239 // following x in "for x in y: ...".
240 v := p.parsePrimaryWithSuffix()
241 if p.tok != COMMA {
242 return v
243 }
244
245 list := []Expr{v}
246 for p.tok == COMMA {
247 p.nextToken()
248 if terminatesExprList(p.tok) {
249 break
250 }
251 list = append(list, p.parsePrimaryWithSuffix())
252 }
253 return &TupleExpr{List: list}
254}
255
256// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
alandonovan30e71c62019-01-04 13:48:12 -0500257// In REPL mode, it does not consume the NEWLINE.
258func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
Alan Donovan312d1a52017-10-02 10:10:28 -0400259 for {
260 stmts = append(stmts, p.parseSmallStmt())
261 if p.tok != SEMI {
262 break
263 }
264 p.nextToken() // consume SEMI
265 if p.tok == NEWLINE || p.tok == EOF {
266 break
267 }
268 }
269 // EOF without NEWLINE occurs in `if x: pass`, for example.
alandonovan30e71c62019-01-04 13:48:12 -0500270 if p.tok != EOF && consumeNL {
Alan Donovan312d1a52017-10-02 10:10:28 -0400271 p.consume(NEWLINE)
272 }
alandonovan30e71c62019-01-04 13:48:12 -0500273
Alan Donovan312d1a52017-10-02 10:10:28 -0400274 return stmts
275}
276
277// small_stmt = RETURN expr?
278// | PASS | BREAK | CONTINUE
alandonovan6696fc32017-10-20 10:55:17 -0400279// | LOAD ...
Hittorp0a5e39a2018-08-09 15:02:30 +0300280// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign
Alan Donovan312d1a52017-10-02 10:10:28 -0400281// | expr
282func (p *parser) parseSmallStmt() Stmt {
alandonovan6696fc32017-10-20 10:55:17 -0400283 switch p.tok {
284 case RETURN:
Alan Donovan312d1a52017-10-02 10:10:28 -0400285 pos := p.nextToken() // consume RETURN
286 var result Expr
287 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
288 result = p.parseExpr(false)
289 }
290 return &ReturnStmt{Return: pos, Result: result}
Alan Donovan312d1a52017-10-02 10:10:28 -0400291
Alan Donovan312d1a52017-10-02 10:10:28 -0400292 case BREAK, CONTINUE, PASS:
293 tok := p.tok
294 pos := p.nextToken() // consume it
295 return &BranchStmt{Token: tok, TokenPos: pos}
alandonovan6696fc32017-10-20 10:55:17 -0400296
297 case LOAD:
298 return p.parseLoadStmt()
Alan Donovan312d1a52017-10-02 10:10:28 -0400299 }
300
301 // Assignment
302 x := p.parseExpr(false)
303 switch p.tok {
Hittorp0a5e39a2018-08-09 15:02:30 +0300304 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
Alan Donovan312d1a52017-10-02 10:10:28 -0400305 op := p.tok
306 pos := p.nextToken() // consume op
307 rhs := p.parseExpr(false)
308 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
309 }
310
Alan Donovan312d1a52017-10-02 10:10:28 -0400311 // Expression statement (e.g. function call, doc string).
312 return &ExprStmt{X: x}
313}
314
alandonovan6696fc32017-10-20 10:55:17 -0400315// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
316func (p *parser) parseLoadStmt() *LoadStmt {
317 loadPos := p.nextToken() // consume LOAD
318 lparen := p.consume(LPAREN)
Alan Donovan312d1a52017-10-02 10:10:28 -0400319
alandonovan6696fc32017-10-20 10:55:17 -0400320 if p.tok != STRING {
321 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
322 }
323 module := p.parsePrimary().(*Literal)
324
325 var from, to []*Ident
326 for p.tok != RPAREN && p.tok != EOF {
327 p.consume(COMMA)
328 if p.tok == RPAREN {
329 break // allow trailing comma
330 }
331 switch p.tok {
332 case STRING:
Alan Donovan312d1a52017-10-02 10:10:28 -0400333 // load("module", "id")
334 // To name is same as original.
alandonovan6696fc32017-10-20 10:55:17 -0400335 lit := p.parsePrimary().(*Literal)
Alan Donovan312d1a52017-10-02 10:10:28 -0400336 id := &Ident{
337 NamePos: lit.TokenPos.add(`"`),
338 Name: lit.Value.(string),
339 }
alandonovan6696fc32017-10-20 10:55:17 -0400340 to = append(to, id)
341 from = append(from, id)
Alan Donovan312d1a52017-10-02 10:10:28 -0400342
alandonovan6696fc32017-10-20 10:55:17 -0400343 case IDENT:
344 // load("module", to="from")
345 id := p.parseIdent()
346 to = append(to, id)
347 if p.tok != EQ {
alandonovan4b42bbf2017-11-10 13:22:52 -0500348 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
alandonovan6696fc32017-10-20 10:55:17 -0400349 }
350 p.consume(EQ)
351 if p.tok != STRING {
352 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
353 }
354 lit := p.parsePrimary().(*Literal)
355 from = append(from, &Ident{
356 NamePos: lit.TokenPos.add(`"`),
357 Name: lit.Value.(string),
358 })
359
360 case RPAREN:
361 p.in.errorf(p.in.pos, "trailing comma in load statement")
362
363 default:
364 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
365 }
366 }
367 rparen := p.consume(RPAREN)
368
369 if len(to) == 0 {
370 p.in.errorf(lparen, "load statement must import at least 1 symbol")
Alan Donovan312d1a52017-10-02 10:10:28 -0400371 }
372 return &LoadStmt{
373 Load: loadPos,
374 Module: module,
375 To: to,
376 From: from,
alandonovan6696fc32017-10-20 10:55:17 -0400377 Rparen: rparen,
Alan Donovan312d1a52017-10-02 10:10:28 -0400378 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400379}
380
381// suite is typically what follows a COLON (e.g. after DEF or FOR).
382// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
383func (p *parser) parseSuite() []Stmt {
384 if p.tok == NEWLINE {
385 p.nextToken() // consume NEWLINE
386 p.consume(INDENT)
387 var stmts []Stmt
388 for p.tok != OUTDENT && p.tok != EOF {
389 stmts = p.parseStmt(stmts)
390 }
391 p.consume(OUTDENT)
392 return stmts
393 }
394
alandonovan30e71c62019-01-04 13:48:12 -0500395 return p.parseSimpleStmt(nil, true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400396}
397
398func (p *parser) parseIdent() *Ident {
399 if p.tok != IDENT {
400 p.in.error(p.in.pos, "not an identifier")
401 }
402 id := &Ident{
403 NamePos: p.tokval.pos,
404 Name: p.tokval.raw,
405 }
406 p.nextToken()
407 return id
408}
409
410func (p *parser) consume(t Token) Position {
411 if p.tok != t {
412 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
413 }
414 return p.nextToken()
415}
416
alandonovandcffbd02020-03-05 18:20:40 -0500417// params = (param COMMA)* param COMMA?
Alan Donovan312d1a52017-10-02 10:10:28 -0400418// |
419//
420// param = IDENT
421// | IDENT EQ test
Alan Donovan52153852019-02-13 19:18:15 -0500422// | STAR
Alan Donovan312d1a52017-10-02 10:10:28 -0400423// | STAR IDENT
424// | STARSTAR IDENT
425//
426// parseParams parses a parameter list. The resulting expressions are of the form:
427//
Alan Donovan52153852019-02-13 19:18:15 -0500428// *Ident x
429// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y
430// *Unary{Op: STAR} *
431// *Unary{Op: STAR, X: *Ident} *args
432// *Unary{Op: STARSTAR, X: *Ident} **kwargs
Alan Donovan312d1a52017-10-02 10:10:28 -0400433func (p *parser) parseParams() []Expr {
434 var params []Expr
Alan Donovan312d1a52017-10-02 10:10:28 -0400435 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
436 if len(params) > 0 {
437 p.consume(COMMA)
438 }
439 if p.tok == RPAREN {
Alan Donovan312d1a52017-10-02 10:10:28 -0400440 break
441 }
442
Alan Donovan52153852019-02-13 19:18:15 -0500443 // * or *args or **kwargs
Taras Tsugrii6be536f2018-12-09 18:07:56 -0800444 if p.tok == STAR || p.tok == STARSTAR {
alandonovan97d77632018-12-09 21:18:13 -0500445 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400446 pos := p.nextToken()
Alan Donovan52153852019-02-13 19:18:15 -0500447 var x Expr
448 if op == STARSTAR || p.tok == IDENT {
449 x = p.parseIdent()
450 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400451 params = append(params, &UnaryExpr{
452 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500453 Op: op,
Alan Donovan52153852019-02-13 19:18:15 -0500454 X: x,
Alan Donovan312d1a52017-10-02 10:10:28 -0400455 })
456 continue
457 }
458
459 // IDENT
460 // IDENT = test
461 id := p.parseIdent()
462 if p.tok == EQ { // default value
463 eq := p.nextToken()
464 dflt := p.parseTest()
465 params = append(params, &BinaryExpr{
466 X: id,
467 OpPos: eq,
468 Op: EQ,
469 Y: dflt,
470 })
471 continue
472 }
473
474 params = append(params, id)
475 }
476 return params
477}
478
479// parseExpr parses an expression, possible consisting of a
480// comma-separated list of 'test' expressions.
481//
482// In many cases we must use parseTest to avoid ambiguity such as
483// f(x, y) vs. f((x, y)).
484func (p *parser) parseExpr(inParens bool) Expr {
485 x := p.parseTest()
486 if p.tok != COMMA {
487 return x
488 }
489
490 // tuple
491 exprs := p.parseExprs([]Expr{x}, inParens)
492 return &TupleExpr{List: exprs}
493}
494
495// parseExprs parses a comma-separated list of expressions, starting with the comma.
496// It is used to parse tuples and list elements.
497// expr_list = (',' expr)* ','?
498func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
499 for p.tok == COMMA {
500 pos := p.nextToken()
501 if terminatesExprList(p.tok) {
502 if !allowTrailingComma {
503 p.in.error(pos, "unparenthesized tuple with trailing comma")
504 }
505 break
506 }
507 exprs = append(exprs, p.parseTest())
508 }
509 return exprs
510}
511
512// parseTest parses a 'test', a single-component expression.
513func (p *parser) parseTest() Expr {
514 if p.tok == LAMBDA {
alandonovanf9faf3b2018-01-16 10:06:02 -0500515 return p.parseLambda(true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400516 }
517
518 x := p.parseTestPrec(0)
519
520 // conditional expression (t IF cond ELSE f)
521 if p.tok == IF {
522 ifpos := p.nextToken()
523 cond := p.parseTestPrec(0)
524 if p.tok != ELSE {
525 p.in.error(ifpos, "conditional expression without else clause")
526 }
527 elsepos := p.nextToken()
528 else_ := p.parseTest()
529 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
530 }
531
532 return x
533}
534
alandonovanf9faf3b2018-01-16 10:06:02 -0500535// parseTestNoCond parses a a single-component expression without
536// consuming a trailing 'if expr else expr'.
537func (p *parser) parseTestNoCond() Expr {
538 if p.tok == LAMBDA {
539 return p.parseLambda(false)
540 }
541 return p.parseTestPrec(0)
542}
543
544// parseLambda parses a lambda expression.
545// The allowCond flag allows the body to be an 'a if b else c' conditional.
546func (p *parser) parseLambda(allowCond bool) Expr {
547 lambda := p.nextToken()
548 var params []Expr
549 if p.tok != COLON {
550 params = p.parseParams()
551 }
552 p.consume(COLON)
553
554 var body Expr
555 if allowCond {
556 body = p.parseTest()
557 } else {
558 body = p.parseTestNoCond()
559 }
560
561 return &LambdaExpr{
562 Lambda: lambda,
alandonovan6ddc71c2019-06-04 09:08:55 -0400563 Params: params,
564 Body: body,
alandonovanf9faf3b2018-01-16 10:06:02 -0500565 }
566}
567
Alan Donovan312d1a52017-10-02 10:10:28 -0400568func (p *parser) parseTestPrec(prec int) Expr {
569 if prec >= len(preclevels) {
570 return p.parsePrimaryWithSuffix()
571 }
572
573 // expr = NOT expr
574 if p.tok == NOT && prec == int(precedence[NOT]) {
575 pos := p.nextToken()
alandonovanc7df0452018-10-04 17:10:12 -0400576 x := p.parseTestPrec(prec)
Alan Donovan312d1a52017-10-02 10:10:28 -0400577 return &UnaryExpr{
578 OpPos: pos,
579 Op: NOT,
580 X: x,
581 }
582 }
583
584 return p.parseBinopExpr(prec)
585}
586
587// expr = test (OP test)*
588// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
589func (p *parser) parseBinopExpr(prec int) Expr {
590 x := p.parseTestPrec(prec + 1)
591 for first := true; ; first = false {
592 if p.tok == NOT {
593 p.nextToken() // consume NOT
594 // In this context, NOT must be followed by IN.
595 // Replace NOT IN by a single NOT_IN token.
596 if p.tok != IN {
597 p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
598 }
599 p.tok = NOT_IN
600 }
601
602 // Binary operator of specified precedence?
603 opprec := int(precedence[p.tok])
604 if opprec < prec {
605 return x
606 }
607
608 // Comparisons are non-associative.
609 if !first && opprec == int(precedence[EQL]) {
610 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
611 x.(*BinaryExpr).Op, p.tok)
612 }
613
614 op := p.tok
615 pos := p.nextToken()
616 y := p.parseTestPrec(opprec + 1)
alandonovan60e4b3d2018-04-02 12:32:39 -0400617 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
Alan Donovan312d1a52017-10-02 10:10:28 -0400618 }
619}
620
621// precedence maps each operator to its precedence (0-7), or -1 for other tokens.
622var precedence [maxToken]int8
623
624// preclevels groups operators of equal precedence.
625// Comparisons are nonassociative; other binary operators associate to the left.
Hittorp0a5e39a2018-08-09 15:02:30 +0300626// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
alandonovan04aba6e2018-11-05 17:45:33 -0500627// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
Alan Donovan312d1a52017-10-02 10:10:28 -0400628var preclevels = [...][]Token{
alandonovanc7df0452018-10-04 17:10:12 -0400629 {OR}, // or
630 {AND}, // and
631 {NOT}, // not (unary)
Alan Donovan312d1a52017-10-02 10:10:28 -0400632 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
alandonovanc7df0452018-10-04 17:10:12 -0400633 {PIPE}, // |
634 {CIRCUMFLEX}, // ^
635 {AMP}, // &
636 {LTLT, GTGT}, // << >>
637 {MINUS, PLUS}, // -
638 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / //
Alan Donovan312d1a52017-10-02 10:10:28 -0400639}
640
641func init() {
642 // populate precedence table
643 for i := range precedence {
644 precedence[i] = -1
645 }
646 for level, tokens := range preclevels {
647 for _, tok := range tokens {
648 precedence[tok] = int8(level)
649 }
650 }
651}
652
Alan Donovan312d1a52017-10-02 10:10:28 -0400653// primary_with_suffix = primary
654// | primary '.' IDENT
655// | primary slice_suffix
656// | primary call_suffix
657func (p *parser) parsePrimaryWithSuffix() Expr {
658 x := p.parsePrimary()
659 for {
660 switch p.tok {
661 case DOT:
662 dot := p.nextToken()
663 id := p.parseIdent()
664 x = &DotExpr{Dot: dot, X: x, Name: id}
665 case LBRACK:
666 x = p.parseSliceSuffix(x)
667 case LPAREN:
668 x = p.parseCallSuffix(x)
669 default:
670 return x
671 }
672 }
673}
674
675// slice_suffix = '[' expr? ':' expr? ':' expr? ']'
676func (p *parser) parseSliceSuffix(x Expr) Expr {
677 lbrack := p.nextToken()
678 var lo, hi, step Expr
679 if p.tok != COLON {
680 y := p.parseExpr(false)
681
682 // index x[y]
683 if p.tok == RBRACK {
684 rbrack := p.nextToken()
685 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
686 }
687
688 lo = y
689 }
690
691 // slice or substring x[lo:hi:step]
692 if p.tok == COLON {
693 p.nextToken()
694 if p.tok != COLON && p.tok != RBRACK {
695 hi = p.parseTest()
696 }
697 }
698 if p.tok == COLON {
699 p.nextToken()
700 if p.tok != RBRACK {
701 step = p.parseTest()
702 }
703 }
704 rbrack := p.consume(RBRACK)
705 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
706}
707
708// call_suffix = '(' arg_list? ')'
709func (p *parser) parseCallSuffix(fn Expr) Expr {
710 lparen := p.consume(LPAREN)
711 var rparen Position
712 var args []Expr
713 if p.tok == RPAREN {
714 rparen = p.nextToken()
715 } else {
716 args = p.parseArgs()
717 rparen = p.consume(RPAREN)
718 }
719 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
720}
721
722// parseArgs parses a list of actual parameter values (arguments).
723// It mirrors the structure of parseParams.
724// arg_list = ((arg COMMA)* arg COMMA?)?
725func (p *parser) parseArgs() []Expr {
726 var args []Expr
Alan Donovan312d1a52017-10-02 10:10:28 -0400727 for p.tok != RPAREN && p.tok != EOF {
728 if len(args) > 0 {
729 p.consume(COMMA)
730 }
731 if p.tok == RPAREN {
Alan Donovan312d1a52017-10-02 10:10:28 -0400732 break
733 }
734
alandonovan97d77632018-12-09 21:18:13 -0500735 // *args or **kwargs
736 if p.tok == STAR || p.tok == STARSTAR {
alandonovan97d77632018-12-09 21:18:13 -0500737 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400738 pos := p.nextToken()
739 x := p.parseTest()
740 args = append(args, &UnaryExpr{
741 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500742 Op: op,
Alan Donovan312d1a52017-10-02 10:10:28 -0400743 X: x,
744 })
745 continue
746 }
747
748 // We use a different strategy from Bazel here to stay within LL(1).
749 // Instead of looking ahead two tokens (IDENT, EQ) we parse
750 // 'test = test' then check that the first was an IDENT.
751 x := p.parseTest()
752
753 if p.tok == EQ {
754 // name = value
755 if _, ok := x.(*Ident); !ok {
756 p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
757 }
758 eq := p.nextToken()
759 y := p.parseTest()
760 x = &BinaryExpr{
761 X: x,
762 OpPos: eq,
763 Op: EQ,
764 Y: y,
765 }
766 }
767
768 args = append(args, x)
769 }
770 return args
771}
772
773// primary = IDENT
774// | INT | FLOAT
775// | STRING
776// | '[' ... // list literal or comprehension
777// | '{' ... // dict literal or comprehension
778// | '(' ... // tuple or parenthesized expression
alandonovanc7df0452018-10-04 17:10:12 -0400779// | ('-'|'+'|'~') primary_with_suffix
Alan Donovan312d1a52017-10-02 10:10:28 -0400780func (p *parser) parsePrimary() Expr {
781 switch p.tok {
782 case IDENT:
783 return p.parseIdent()
784
785 case INT, FLOAT, STRING:
786 var val interface{}
787 tok := p.tok
788 switch tok {
789 case INT:
Mohamed Elqdusy69e96152018-01-22 20:00:29 +0100790 if p.tokval.bigInt != nil {
791 val = p.tokval.bigInt
792 } else {
793 val = p.tokval.int
794 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400795 case FLOAT:
796 val = p.tokval.float
797 case STRING:
798 val = p.tokval.string
799 }
800 raw := p.tokval.raw
801 pos := p.nextToken()
802 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
803
804 case LBRACK:
805 return p.parseList()
806
807 case LBRACE:
808 return p.parseDict()
809
810 case LPAREN:
811 lparen := p.nextToken()
812 if p.tok == RPAREN {
813 // empty tuple
814 rparen := p.nextToken()
815 return &TupleExpr{Lparen: lparen, Rparen: rparen}
816 }
817 e := p.parseExpr(true) // allow trailing comma
Laurent Le Brun28ceca72018-02-26 15:01:53 +0100818 rparen := p.consume(RPAREN)
819 return &ParenExpr{
820 Lparen: lparen,
821 X: e,
822 Rparen: rparen,
823 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400824
alandonovanc7df0452018-10-04 17:10:12 -0400825 case MINUS, PLUS, TILDE: // unary
Alan Donovan312d1a52017-10-02 10:10:28 -0400826 tok := p.tok
827 pos := p.nextToken()
828 x := p.parsePrimaryWithSuffix()
829 return &UnaryExpr{
830 OpPos: pos,
831 Op: tok,
832 X: x,
833 }
834 }
835 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
836 panic("unreachable")
837}
838
839// list = '[' ']'
840// | '[' expr ']'
841// | '[' expr expr_list ']'
842// | '[' expr (FOR loop_variables IN expr)+ ']'
843func (p *parser) parseList() Expr {
844 lbrack := p.nextToken()
845 if p.tok == RBRACK {
846 // empty List
847 rbrack := p.nextToken()
848 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
849 }
850
851 x := p.parseTest()
852
853 if p.tok == FOR {
854 // list comprehension
855 return p.parseComprehensionSuffix(lbrack, x, RBRACK)
856 }
857
858 exprs := []Expr{x}
859 if p.tok == COMMA {
860 // multi-item list literal
861 exprs = p.parseExprs(exprs, true) // allow trailing comma
862 }
863
864 rbrack := p.consume(RBRACK)
865 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
866}
867
868// dict = '{' '}'
869// | '{' dict_entry_list '}'
870// | '{' dict_entry FOR loop_variables IN expr '}'
871func (p *parser) parseDict() Expr {
872 lbrace := p.nextToken()
873 if p.tok == RBRACE {
874 // empty dict
875 rbrace := p.nextToken()
876 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
877 }
878
879 x := p.parseDictEntry()
880
881 if p.tok == FOR {
882 // dict comprehension
883 return p.parseComprehensionSuffix(lbrace, x, RBRACE)
884 }
885
886 entries := []Expr{x}
887 for p.tok == COMMA {
888 p.nextToken()
889 if p.tok == RBRACE {
890 break
891 }
892 entries = append(entries, p.parseDictEntry())
893 }
894
895 rbrace := p.consume(RBRACE)
896 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
897}
898
899// dict_entry = test ':' test
900func (p *parser) parseDictEntry() *DictEntry {
901 k := p.parseTest()
902 colon := p.consume(COLON)
903 v := p.parseTest()
904 return &DictEntry{Key: k, Colon: colon, Value: v}
905}
906
907// comp_suffix = FOR loopvars IN expr comp_suffix
908// | IF expr comp_suffix
909// | ']' or ')' (end)
910//
911// There can be multiple FOR/IF clauses; the first is always a FOR.
912func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
913 var clauses []Node
914 for p.tok != endBrace {
915 if p.tok == FOR {
916 pos := p.nextToken()
917 vars := p.parseForLoopVariables()
918 in := p.consume(IN)
919 // Following Python 3, the operand of IN cannot be:
920 // - a conditional expression ('x if y else z'),
921 // due to conflicts in Python grammar
922 // ('if' is used by the comprehension);
923 // - a lambda expression
924 // - an unparenthesized tuple.
925 x := p.parseTestPrec(0)
926 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
927 } else if p.tok == IF {
928 pos := p.nextToken()
alandonovanf9faf3b2018-01-16 10:06:02 -0500929 cond := p.parseTestNoCond()
Alan Donovan312d1a52017-10-02 10:10:28 -0400930 clauses = append(clauses, &IfClause{If: pos, Cond: cond})
931 } else {
932 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
933 }
934 }
935 rbrace := p.nextToken()
936
937 return &Comprehension{
938 Curly: endBrace == RBRACE,
939 Lbrack: lbrace,
940 Body: body,
941 Clauses: clauses,
942 Rbrack: rbrace,
943 }
944}
945
946func terminatesExprList(tok Token) bool {
947 switch tok {
948 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
949 return true
950 }
951 return false
952}
Laurent Le Brun689fc222018-02-22 19:37:18 +0100953
954// Comment assignment.
955// We build two lists of all subnodes, preorder and postorder.
956// The preorder list is ordered by start location, with outer nodes first.
957// The postorder list is ordered by end location, with outer nodes last.
958// We use the preorder list to assign each whole-line comment to the syntax
959// immediately following it, and we use the postorder list to assign each
960// end-of-line comment to the syntax immediately preceding it.
961
962// flattenAST returns the list of AST nodes, both in prefix order and in postfix
963// order.
964func flattenAST(root Node) (pre, post []Node) {
965 stack := []Node{}
966 Walk(root, func(n Node) bool {
967 if n != nil {
968 pre = append(pre, n)
969 stack = append(stack, n)
970 } else {
971 post = append(post, stack[len(stack)-1])
972 stack = stack[:len(stack)-1]
973 }
974 return true
975 })
976 return pre, post
977}
978
979// assignComments attaches comments to nearby syntax.
980func (p *parser) assignComments(n Node) {
981 // Leave early if there are no comments
982 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
983 return
984 }
985
986 pre, post := flattenAST(n)
987
988 // Assign line comments to syntax immediately following.
989 line := p.in.lineComments
990 for _, x := range pre {
991 start, _ := x.Span()
992
993 switch x.(type) {
994 case *File:
995 continue
996 }
997
998 for len(line) > 0 && !start.isBefore(line[0].Start) {
999 x.AllocComments()
1000 x.Comments().Before = append(x.Comments().Before, line[0])
1001 line = line[1:]
1002 }
1003 }
1004
1005 // Remaining line comments go at end of file.
1006 if len(line) > 0 {
1007 n.AllocComments()
1008 n.Comments().After = append(n.Comments().After, line...)
1009 }
1010
1011 // Assign suffix comments to syntax immediately before.
1012 suffix := p.in.suffixComments
1013 for i := len(post) - 1; i >= 0; i-- {
1014 x := post[i]
1015
1016 // Do not assign suffix comments to file
1017 switch x.(type) {
1018 case *File:
1019 continue
1020 }
1021
1022 _, end := x.Span()
1023 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
1024 x.AllocComments()
1025 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
1026 suffix = suffix[:len(suffix)-1]
1027 }
1028 }
1029}