blob: f4c8fff4d7428e0e6711836314cd15063f68050f [file] [log] [blame]
Alan Donovan312d1a52017-10-02 10:10:28 -04001// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
Alan Donovane3deafe2018-10-23 11:05:09 -04007// This file defines a recursive-descent parser for Starlark.
8// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
Alan Donovan312d1a52017-10-02 10:10:28 -04009//
10// TODO(adonovan): use syntax.Error more systematically throughout the
11// package. Verify that error positions are correct using the
12// chunkedfile mechanism.
13
alandonovanb7e3b1f2018-12-12 17:14:58 -050014import "log"
Alan Donovan312d1a52017-10-02 10:10:28 -040015
16// Enable this flag to print the token stream and log.Fatal on the first error.
17const debug = false
18
Laurent Le Brun689fc222018-02-22 19:37:18 +010019// A Mode value is a set of flags (or 0) that controls optional parser functionality.
20type Mode uint
21
22const (
23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
24)
25
Alan Donovan312d1a52017-10-02 10:10:28 -040026// Parse parses the input data and returns the corresponding parse tree.
27//
28// If src != nil, ParseFile parses the source from src and the filename
29// is only used when recording position information.
30// The type of the argument for the src parameter must be string,
alandonovan0a10e4f2021-02-08 12:20:22 -050031// []byte, io.Reader, or FilePortion.
Alan Donovan312d1a52017-10-02 10:10:28 -040032// If src == nil, ParseFile parses the file specified by filename.
Laurent Le Brun689fc222018-02-22 19:37:18 +010033func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
34 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040035 if err != nil {
36 return nil, err
37 }
38 p := parser{in: in}
39 defer p.in.recover(&err)
40
41 p.nextToken() // read first lookahead token
42 f = p.parseFile()
43 if f != nil {
44 f.Path = filename
45 }
Laurent Le Brun689fc222018-02-22 19:37:18 +010046 p.assignComments(f)
Alan Donovan312d1a52017-10-02 10:10:28 -040047 return f, nil
48}
49
alandonovan30e71c62019-01-04 13:48:12 -050050// ParseCompoundStmt parses a single compound statement:
51// a blank line, a def, for, while, or if statement, or a
52// semicolon-separated list of simple statements followed
53// by a newline. These are the units on which the REPL operates.
54// ParseCompoundStmt does not consume any following input.
55// The parser calls the readline function each
56// time it needs a new line of input.
57func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
58 in, err := newScanner(filename, readline, false)
59 if err != nil {
60 return nil, err
61 }
62
63 p := parser{in: in}
64 defer p.in.recover(&err)
65
66 p.nextToken() // read first lookahead token
67
68 var stmts []Stmt
69 switch p.tok {
70 case DEF, IF, FOR, WHILE:
71 stmts = p.parseStmt(stmts)
72 case NEWLINE:
73 // blank line
74 default:
alandonovan30e71c62019-01-04 13:48:12 -050075 stmts = p.parseSimpleStmt(stmts, false)
alandonovan1258e4d2019-01-25 10:19:30 -050076 // Require but don't consume newline, to avoid blocking again.
77 if p.tok != NEWLINE {
78 p.in.errorf(p.in.pos, "invalid syntax")
79 }
alandonovan30e71c62019-01-04 13:48:12 -050080 }
81
82 return &File{Path: filename, Stmts: stmts}, nil
83}
84
Alan Donovane3deafe2018-10-23 11:05:09 -040085// ParseExpr parses a Starlark expression.
alandonovanf26cf182019-05-28 16:17:30 -040086// A comma-separated list of expressions is parsed as a tuple.
Alan Donovan312d1a52017-10-02 10:10:28 -040087// See Parse for explanation of parameters.
Laurent Le Brun689fc222018-02-22 19:37:18 +010088func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
89 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040090 if err != nil {
91 return nil, err
92 }
93 p := parser{in: in}
94 defer p.in.recover(&err)
95
96 p.nextToken() // read first lookahead token
alandonovan30e71c62019-01-04 13:48:12 -050097
alandonovanf26cf182019-05-28 16:17:30 -040098 // Use parseExpr, not parseTest, to permit an unparenthesized tuple.
99 expr = p.parseExpr(false)
Alan Donovan312d1a52017-10-02 10:10:28 -0400100
Alan Donovanae063842017-10-10 15:46:17 -0400101 // A following newline (e.g. "f()\n") appears outside any brackets,
alandonovanbd7aaf52017-10-11 16:34:02 -0400102 // on a non-blank line, and thus results in a NEWLINE token.
Alan Donovanae063842017-10-10 15:46:17 -0400103 if p.tok == NEWLINE {
104 p.nextToken()
105 }
106
Alan Donovan312d1a52017-10-02 10:10:28 -0400107 if p.tok != EOF {
108 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
109 }
Laurent Le Brun689fc222018-02-22 19:37:18 +0100110 p.assignComments(expr)
Alan Donovan312d1a52017-10-02 10:10:28 -0400111 return expr, nil
112}
113
114type parser struct {
115 in *scanner
116 tok Token
117 tokval tokenValue
118}
119
120// nextToken advances the scanner and returns the position of the
121// previous token.
122func (p *parser) nextToken() Position {
123 oldpos := p.tokval.pos
124 p.tok = p.in.nextToken(&p.tokval)
125 // enable to see the token stream
126 if debug {
127 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
128 }
129 return oldpos
130}
131
132// file_input = (NEWLINE | stmt)* EOF
133func (p *parser) parseFile() *File {
134 var stmts []Stmt
135 for p.tok != EOF {
136 if p.tok == NEWLINE {
137 p.nextToken()
138 continue
139 }
140 stmts = p.parseStmt(stmts)
141 }
142 return &File{Stmts: stmts}
143}
144
145func (p *parser) parseStmt(stmts []Stmt) []Stmt {
146 if p.tok == DEF {
147 return append(stmts, p.parseDefStmt())
148 } else if p.tok == IF {
149 return append(stmts, p.parseIfStmt())
150 } else if p.tok == FOR {
151 return append(stmts, p.parseForStmt())
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100152 } else if p.tok == WHILE {
153 return append(stmts, p.parseWhileStmt())
Alan Donovan312d1a52017-10-02 10:10:28 -0400154 }
alandonovan30e71c62019-01-04 13:48:12 -0500155 return p.parseSimpleStmt(stmts, true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400156}
157
158func (p *parser) parseDefStmt() Stmt {
159 defpos := p.nextToken() // consume DEF
160 id := p.parseIdent()
161 p.consume(LPAREN)
162 params := p.parseParams()
163 p.consume(RPAREN)
164 p.consume(COLON)
165 body := p.parseSuite()
166 return &DefStmt{
alandonovan6ddc71c2019-06-04 09:08:55 -0400167 Def: defpos,
168 Name: id,
169 Params: params,
170 Body: body,
Alan Donovan312d1a52017-10-02 10:10:28 -0400171 }
172}
173
174func (p *parser) parseIfStmt() Stmt {
175 ifpos := p.nextToken() // consume IF
176 cond := p.parseTest()
177 p.consume(COLON)
178 body := p.parseSuite()
179 ifStmt := &IfStmt{
180 If: ifpos,
181 Cond: cond,
182 True: body,
183 }
184 tail := ifStmt
185 for p.tok == ELIF {
186 elifpos := p.nextToken() // consume ELIF
187 cond := p.parseTest()
188 p.consume(COLON)
189 body := p.parseSuite()
190 elif := &IfStmt{
191 If: elifpos,
192 Cond: cond,
193 True: body,
194 }
195 tail.ElsePos = elifpos
196 tail.False = []Stmt{elif}
197 tail = elif
198 }
199 if p.tok == ELSE {
200 tail.ElsePos = p.nextToken() // consume ELSE
201 p.consume(COLON)
202 tail.False = p.parseSuite()
203 }
204 return ifStmt
205}
206
207func (p *parser) parseForStmt() Stmt {
208 forpos := p.nextToken() // consume FOR
209 vars := p.parseForLoopVariables()
210 p.consume(IN)
211 x := p.parseExpr(false)
212 p.consume(COLON)
213 body := p.parseSuite()
214 return &ForStmt{
215 For: forpos,
216 Vars: vars,
217 X: x,
218 Body: body,
219 }
220}
221
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100222func (p *parser) parseWhileStmt() Stmt {
223 whilepos := p.nextToken() // consume WHILE
224 cond := p.parseTest()
225 p.consume(COLON)
226 body := p.parseSuite()
227 return &WhileStmt{
228 While: whilepos,
229 Cond: cond,
230 Body: body,
231 }
232}
233
Alan Donovan312d1a52017-10-02 10:10:28 -0400234// Equivalent to 'exprlist' production in Python grammar.
235//
236// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
237func (p *parser) parseForLoopVariables() Expr {
238 // Avoid parseExpr because it would consume the IN token
239 // following x in "for x in y: ...".
240 v := p.parsePrimaryWithSuffix()
241 if p.tok != COMMA {
242 return v
243 }
244
245 list := []Expr{v}
246 for p.tok == COMMA {
247 p.nextToken()
248 if terminatesExprList(p.tok) {
249 break
250 }
251 list = append(list, p.parsePrimaryWithSuffix())
252 }
253 return &TupleExpr{List: list}
254}
255
256// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
alandonovan30e71c62019-01-04 13:48:12 -0500257// In REPL mode, it does not consume the NEWLINE.
258func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
Alan Donovan312d1a52017-10-02 10:10:28 -0400259 for {
260 stmts = append(stmts, p.parseSmallStmt())
261 if p.tok != SEMI {
262 break
263 }
264 p.nextToken() // consume SEMI
265 if p.tok == NEWLINE || p.tok == EOF {
266 break
267 }
268 }
269 // EOF without NEWLINE occurs in `if x: pass`, for example.
alandonovan30e71c62019-01-04 13:48:12 -0500270 if p.tok != EOF && consumeNL {
Alan Donovan312d1a52017-10-02 10:10:28 -0400271 p.consume(NEWLINE)
272 }
alandonovan30e71c62019-01-04 13:48:12 -0500273
Alan Donovan312d1a52017-10-02 10:10:28 -0400274 return stmts
275}
276
277// small_stmt = RETURN expr?
278// | PASS | BREAK | CONTINUE
alandonovan6696fc32017-10-20 10:55:17 -0400279// | LOAD ...
Hittorp0a5e39a2018-08-09 15:02:30 +0300280// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign
Alan Donovan312d1a52017-10-02 10:10:28 -0400281// | expr
282func (p *parser) parseSmallStmt() Stmt {
alandonovan6696fc32017-10-20 10:55:17 -0400283 switch p.tok {
284 case RETURN:
Alan Donovan312d1a52017-10-02 10:10:28 -0400285 pos := p.nextToken() // consume RETURN
286 var result Expr
287 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
288 result = p.parseExpr(false)
289 }
290 return &ReturnStmt{Return: pos, Result: result}
Alan Donovan312d1a52017-10-02 10:10:28 -0400291
Alan Donovan312d1a52017-10-02 10:10:28 -0400292 case BREAK, CONTINUE, PASS:
293 tok := p.tok
294 pos := p.nextToken() // consume it
295 return &BranchStmt{Token: tok, TokenPos: pos}
alandonovan6696fc32017-10-20 10:55:17 -0400296
297 case LOAD:
298 return p.parseLoadStmt()
Alan Donovan312d1a52017-10-02 10:10:28 -0400299 }
300
301 // Assignment
302 x := p.parseExpr(false)
303 switch p.tok {
Hittorp0a5e39a2018-08-09 15:02:30 +0300304 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
Alan Donovan312d1a52017-10-02 10:10:28 -0400305 op := p.tok
306 pos := p.nextToken() // consume op
307 rhs := p.parseExpr(false)
308 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
309 }
310
Alan Donovan312d1a52017-10-02 10:10:28 -0400311 // Expression statement (e.g. function call, doc string).
312 return &ExprStmt{X: x}
313}
314
alandonovan6696fc32017-10-20 10:55:17 -0400315// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
316func (p *parser) parseLoadStmt() *LoadStmt {
317 loadPos := p.nextToken() // consume LOAD
318 lparen := p.consume(LPAREN)
Alan Donovan312d1a52017-10-02 10:10:28 -0400319
alandonovan6696fc32017-10-20 10:55:17 -0400320 if p.tok != STRING {
321 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
322 }
323 module := p.parsePrimary().(*Literal)
324
325 var from, to []*Ident
326 for p.tok != RPAREN && p.tok != EOF {
327 p.consume(COMMA)
328 if p.tok == RPAREN {
329 break // allow trailing comma
330 }
331 switch p.tok {
332 case STRING:
Alan Donovan312d1a52017-10-02 10:10:28 -0400333 // load("module", "id")
334 // To name is same as original.
alandonovan6696fc32017-10-20 10:55:17 -0400335 lit := p.parsePrimary().(*Literal)
Alan Donovan312d1a52017-10-02 10:10:28 -0400336 id := &Ident{
337 NamePos: lit.TokenPos.add(`"`),
338 Name: lit.Value.(string),
339 }
alandonovan6696fc32017-10-20 10:55:17 -0400340 to = append(to, id)
341 from = append(from, id)
Alan Donovan312d1a52017-10-02 10:10:28 -0400342
alandonovan6696fc32017-10-20 10:55:17 -0400343 case IDENT:
344 // load("module", to="from")
345 id := p.parseIdent()
346 to = append(to, id)
347 if p.tok != EQ {
alandonovan4b42bbf2017-11-10 13:22:52 -0500348 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
alandonovan6696fc32017-10-20 10:55:17 -0400349 }
350 p.consume(EQ)
351 if p.tok != STRING {
352 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
353 }
354 lit := p.parsePrimary().(*Literal)
355 from = append(from, &Ident{
356 NamePos: lit.TokenPos.add(`"`),
357 Name: lit.Value.(string),
358 })
359
360 case RPAREN:
361 p.in.errorf(p.in.pos, "trailing comma in load statement")
362
363 default:
364 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
365 }
366 }
367 rparen := p.consume(RPAREN)
368
369 if len(to) == 0 {
370 p.in.errorf(lparen, "load statement must import at least 1 symbol")
Alan Donovan312d1a52017-10-02 10:10:28 -0400371 }
372 return &LoadStmt{
373 Load: loadPos,
374 Module: module,
375 To: to,
376 From: from,
alandonovan6696fc32017-10-20 10:55:17 -0400377 Rparen: rparen,
Alan Donovan312d1a52017-10-02 10:10:28 -0400378 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400379}
380
381// suite is typically what follows a COLON (e.g. after DEF or FOR).
382// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
383func (p *parser) parseSuite() []Stmt {
384 if p.tok == NEWLINE {
385 p.nextToken() // consume NEWLINE
386 p.consume(INDENT)
387 var stmts []Stmt
388 for p.tok != OUTDENT && p.tok != EOF {
389 stmts = p.parseStmt(stmts)
390 }
391 p.consume(OUTDENT)
392 return stmts
393 }
394
alandonovan30e71c62019-01-04 13:48:12 -0500395 return p.parseSimpleStmt(nil, true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400396}
397
398func (p *parser) parseIdent() *Ident {
399 if p.tok != IDENT {
400 p.in.error(p.in.pos, "not an identifier")
401 }
402 id := &Ident{
403 NamePos: p.tokval.pos,
404 Name: p.tokval.raw,
405 }
406 p.nextToken()
407 return id
408}
409
410func (p *parser) consume(t Token) Position {
411 if p.tok != t {
412 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
413 }
414 return p.nextToken()
415}
416
alandonovandcffbd02020-03-05 18:20:40 -0500417// params = (param COMMA)* param COMMA?
Alan Donovan312d1a52017-10-02 10:10:28 -0400418// |
419//
420// param = IDENT
421// | IDENT EQ test
Alan Donovan52153852019-02-13 19:18:15 -0500422// | STAR
Alan Donovan312d1a52017-10-02 10:10:28 -0400423// | STAR IDENT
424// | STARSTAR IDENT
425//
426// parseParams parses a parameter list. The resulting expressions are of the form:
427//
Alan Donovan52153852019-02-13 19:18:15 -0500428// *Ident x
429// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y
430// *Unary{Op: STAR} *
431// *Unary{Op: STAR, X: *Ident} *args
432// *Unary{Op: STARSTAR, X: *Ident} **kwargs
Alan Donovan312d1a52017-10-02 10:10:28 -0400433func (p *parser) parseParams() []Expr {
434 var params []Expr
Alan Donovan312d1a52017-10-02 10:10:28 -0400435 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
436 if len(params) > 0 {
437 p.consume(COMMA)
438 }
439 if p.tok == RPAREN {
Alan Donovan312d1a52017-10-02 10:10:28 -0400440 break
441 }
442
Alan Donovan52153852019-02-13 19:18:15 -0500443 // * or *args or **kwargs
Taras Tsugrii6be536f2018-12-09 18:07:56 -0800444 if p.tok == STAR || p.tok == STARSTAR {
alandonovan97d77632018-12-09 21:18:13 -0500445 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400446 pos := p.nextToken()
Alan Donovan52153852019-02-13 19:18:15 -0500447 var x Expr
448 if op == STARSTAR || p.tok == IDENT {
449 x = p.parseIdent()
450 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400451 params = append(params, &UnaryExpr{
452 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500453 Op: op,
Alan Donovan52153852019-02-13 19:18:15 -0500454 X: x,
Alan Donovan312d1a52017-10-02 10:10:28 -0400455 })
456 continue
457 }
458
459 // IDENT
460 // IDENT = test
461 id := p.parseIdent()
462 if p.tok == EQ { // default value
463 eq := p.nextToken()
464 dflt := p.parseTest()
465 params = append(params, &BinaryExpr{
466 X: id,
467 OpPos: eq,
468 Op: EQ,
469 Y: dflt,
470 })
471 continue
472 }
473
474 params = append(params, id)
475 }
476 return params
477}
478
479// parseExpr parses an expression, possible consisting of a
480// comma-separated list of 'test' expressions.
481//
482// In many cases we must use parseTest to avoid ambiguity such as
483// f(x, y) vs. f((x, y)).
484func (p *parser) parseExpr(inParens bool) Expr {
485 x := p.parseTest()
486 if p.tok != COMMA {
487 return x
488 }
489
490 // tuple
491 exprs := p.parseExprs([]Expr{x}, inParens)
492 return &TupleExpr{List: exprs}
493}
494
495// parseExprs parses a comma-separated list of expressions, starting with the comma.
496// It is used to parse tuples and list elements.
497// expr_list = (',' expr)* ','?
498func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
499 for p.tok == COMMA {
500 pos := p.nextToken()
501 if terminatesExprList(p.tok) {
502 if !allowTrailingComma {
503 p.in.error(pos, "unparenthesized tuple with trailing comma")
504 }
505 break
506 }
507 exprs = append(exprs, p.parseTest())
508 }
509 return exprs
510}
511
512// parseTest parses a 'test', a single-component expression.
513func (p *parser) parseTest() Expr {
514 if p.tok == LAMBDA {
alandonovanf9faf3b2018-01-16 10:06:02 -0500515 return p.parseLambda(true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400516 }
517
518 x := p.parseTestPrec(0)
519
520 // conditional expression (t IF cond ELSE f)
521 if p.tok == IF {
522 ifpos := p.nextToken()
523 cond := p.parseTestPrec(0)
524 if p.tok != ELSE {
525 p.in.error(ifpos, "conditional expression without else clause")
526 }
527 elsepos := p.nextToken()
528 else_ := p.parseTest()
529 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
530 }
531
532 return x
533}
534
alandonovanf9faf3b2018-01-16 10:06:02 -0500535// parseTestNoCond parses a a single-component expression without
536// consuming a trailing 'if expr else expr'.
537func (p *parser) parseTestNoCond() Expr {
538 if p.tok == LAMBDA {
539 return p.parseLambda(false)
540 }
541 return p.parseTestPrec(0)
542}
543
544// parseLambda parses a lambda expression.
545// The allowCond flag allows the body to be an 'a if b else c' conditional.
546func (p *parser) parseLambda(allowCond bool) Expr {
547 lambda := p.nextToken()
548 var params []Expr
549 if p.tok != COLON {
550 params = p.parseParams()
551 }
552 p.consume(COLON)
553
554 var body Expr
555 if allowCond {
556 body = p.parseTest()
557 } else {
558 body = p.parseTestNoCond()
559 }
560
561 return &LambdaExpr{
562 Lambda: lambda,
alandonovan6ddc71c2019-06-04 09:08:55 -0400563 Params: params,
564 Body: body,
alandonovanf9faf3b2018-01-16 10:06:02 -0500565 }
566}
567
Alan Donovan312d1a52017-10-02 10:10:28 -0400568func (p *parser) parseTestPrec(prec int) Expr {
569 if prec >= len(preclevels) {
570 return p.parsePrimaryWithSuffix()
571 }
572
573 // expr = NOT expr
574 if p.tok == NOT && prec == int(precedence[NOT]) {
575 pos := p.nextToken()
alandonovanc7df0452018-10-04 17:10:12 -0400576 x := p.parseTestPrec(prec)
Alan Donovan312d1a52017-10-02 10:10:28 -0400577 return &UnaryExpr{
578 OpPos: pos,
579 Op: NOT,
580 X: x,
581 }
582 }
583
584 return p.parseBinopExpr(prec)
585}
586
587// expr = test (OP test)*
588// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
589func (p *parser) parseBinopExpr(prec int) Expr {
590 x := p.parseTestPrec(prec + 1)
591 for first := true; ; first = false {
592 if p.tok == NOT {
593 p.nextToken() // consume NOT
594 // In this context, NOT must be followed by IN.
595 // Replace NOT IN by a single NOT_IN token.
596 if p.tok != IN {
597 p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
598 }
599 p.tok = NOT_IN
600 }
601
602 // Binary operator of specified precedence?
603 opprec := int(precedence[p.tok])
604 if opprec < prec {
605 return x
606 }
607
608 // Comparisons are non-associative.
609 if !first && opprec == int(precedence[EQL]) {
610 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
611 x.(*BinaryExpr).Op, p.tok)
612 }
613
614 op := p.tok
615 pos := p.nextToken()
616 y := p.parseTestPrec(opprec + 1)
alandonovan60e4b3d2018-04-02 12:32:39 -0400617 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
Alan Donovan312d1a52017-10-02 10:10:28 -0400618 }
619}
620
621// precedence maps each operator to its precedence (0-7), or -1 for other tokens.
622var precedence [maxToken]int8
623
624// preclevels groups operators of equal precedence.
625// Comparisons are nonassociative; other binary operators associate to the left.
Hittorp0a5e39a2018-08-09 15:02:30 +0300626// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
alandonovan04aba6e2018-11-05 17:45:33 -0500627// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
Alan Donovan312d1a52017-10-02 10:10:28 -0400628var preclevels = [...][]Token{
alandonovanc7df0452018-10-04 17:10:12 -0400629 {OR}, // or
630 {AND}, // and
631 {NOT}, // not (unary)
Alan Donovan312d1a52017-10-02 10:10:28 -0400632 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
alandonovanc7df0452018-10-04 17:10:12 -0400633 {PIPE}, // |
634 {CIRCUMFLEX}, // ^
635 {AMP}, // &
636 {LTLT, GTGT}, // << >>
637 {MINUS, PLUS}, // -
638 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / //
Alan Donovan312d1a52017-10-02 10:10:28 -0400639}
640
641func init() {
642 // populate precedence table
643 for i := range precedence {
644 precedence[i] = -1
645 }
646 for level, tokens := range preclevels {
647 for _, tok := range tokens {
648 precedence[tok] = int8(level)
649 }
650 }
651}
652
Alan Donovan312d1a52017-10-02 10:10:28 -0400653// primary_with_suffix = primary
654// | primary '.' IDENT
655// | primary slice_suffix
656// | primary call_suffix
657func (p *parser) parsePrimaryWithSuffix() Expr {
658 x := p.parsePrimary()
659 for {
660 switch p.tok {
661 case DOT:
662 dot := p.nextToken()
663 id := p.parseIdent()
664 x = &DotExpr{Dot: dot, X: x, Name: id}
665 case LBRACK:
666 x = p.parseSliceSuffix(x)
667 case LPAREN:
668 x = p.parseCallSuffix(x)
669 default:
670 return x
671 }
672 }
673}
674
675// slice_suffix = '[' expr? ':' expr? ':' expr? ']'
676func (p *parser) parseSliceSuffix(x Expr) Expr {
677 lbrack := p.nextToken()
678 var lo, hi, step Expr
679 if p.tok != COLON {
680 y := p.parseExpr(false)
681
682 // index x[y]
683 if p.tok == RBRACK {
684 rbrack := p.nextToken()
685 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
686 }
687
688 lo = y
689 }
690
691 // slice or substring x[lo:hi:step]
692 if p.tok == COLON {
693 p.nextToken()
694 if p.tok != COLON && p.tok != RBRACK {
695 hi = p.parseTest()
696 }
697 }
698 if p.tok == COLON {
699 p.nextToken()
700 if p.tok != RBRACK {
701 step = p.parseTest()
702 }
703 }
704 rbrack := p.consume(RBRACK)
705 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
706}
707
708// call_suffix = '(' arg_list? ')'
709func (p *parser) parseCallSuffix(fn Expr) Expr {
710 lparen := p.consume(LPAREN)
711 var rparen Position
712 var args []Expr
713 if p.tok == RPAREN {
714 rparen = p.nextToken()
715 } else {
716 args = p.parseArgs()
717 rparen = p.consume(RPAREN)
718 }
719 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
720}
721
722// parseArgs parses a list of actual parameter values (arguments).
723// It mirrors the structure of parseParams.
724// arg_list = ((arg COMMA)* arg COMMA?)?
725func (p *parser) parseArgs() []Expr {
726 var args []Expr
Alan Donovan312d1a52017-10-02 10:10:28 -0400727 for p.tok != RPAREN && p.tok != EOF {
728 if len(args) > 0 {
729 p.consume(COMMA)
730 }
731 if p.tok == RPAREN {
Alan Donovan312d1a52017-10-02 10:10:28 -0400732 break
733 }
734
alandonovan97d77632018-12-09 21:18:13 -0500735 // *args or **kwargs
736 if p.tok == STAR || p.tok == STARSTAR {
alandonovan97d77632018-12-09 21:18:13 -0500737 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400738 pos := p.nextToken()
739 x := p.parseTest()
740 args = append(args, &UnaryExpr{
741 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500742 Op: op,
Alan Donovan312d1a52017-10-02 10:10:28 -0400743 X: x,
744 })
745 continue
746 }
747
748 // We use a different strategy from Bazel here to stay within LL(1).
749 // Instead of looking ahead two tokens (IDENT, EQ) we parse
750 // 'test = test' then check that the first was an IDENT.
751 x := p.parseTest()
752
753 if p.tok == EQ {
754 // name = value
755 if _, ok := x.(*Ident); !ok {
756 p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
757 }
758 eq := p.nextToken()
759 y := p.parseTest()
760 x = &BinaryExpr{
761 X: x,
762 OpPos: eq,
763 Op: EQ,
764 Y: y,
765 }
766 }
767
768 args = append(args, x)
769 }
770 return args
771}
772
773// primary = IDENT
alandonovanebe61bd2021-02-12 16:57:32 -0500774// | INT | FLOAT | STRING | BYTES
Alan Donovan312d1a52017-10-02 10:10:28 -0400775// | '[' ... // list literal or comprehension
776// | '{' ... // dict literal or comprehension
777// | '(' ... // tuple or parenthesized expression
alandonovanc7df0452018-10-04 17:10:12 -0400778// | ('-'|'+'|'~') primary_with_suffix
Alan Donovan312d1a52017-10-02 10:10:28 -0400779func (p *parser) parsePrimary() Expr {
780 switch p.tok {
781 case IDENT:
782 return p.parseIdent()
783
alandonovanebe61bd2021-02-12 16:57:32 -0500784 case INT, FLOAT, STRING, BYTES:
Alan Donovan312d1a52017-10-02 10:10:28 -0400785 var val interface{}
786 tok := p.tok
787 switch tok {
788 case INT:
Mohamed Elqdusy69e96152018-01-22 20:00:29 +0100789 if p.tokval.bigInt != nil {
790 val = p.tokval.bigInt
791 } else {
792 val = p.tokval.int
793 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400794 case FLOAT:
795 val = p.tokval.float
alandonovanebe61bd2021-02-12 16:57:32 -0500796 case STRING, BYTES:
Alan Donovan312d1a52017-10-02 10:10:28 -0400797 val = p.tokval.string
798 }
799 raw := p.tokval.raw
800 pos := p.nextToken()
801 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
802
803 case LBRACK:
804 return p.parseList()
805
806 case LBRACE:
807 return p.parseDict()
808
809 case LPAREN:
810 lparen := p.nextToken()
811 if p.tok == RPAREN {
812 // empty tuple
813 rparen := p.nextToken()
814 return &TupleExpr{Lparen: lparen, Rparen: rparen}
815 }
816 e := p.parseExpr(true) // allow trailing comma
Laurent Le Brun28ceca72018-02-26 15:01:53 +0100817 rparen := p.consume(RPAREN)
818 return &ParenExpr{
819 Lparen: lparen,
820 X: e,
821 Rparen: rparen,
822 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400823
alandonovanc7df0452018-10-04 17:10:12 -0400824 case MINUS, PLUS, TILDE: // unary
Alan Donovan312d1a52017-10-02 10:10:28 -0400825 tok := p.tok
826 pos := p.nextToken()
827 x := p.parsePrimaryWithSuffix()
828 return &UnaryExpr{
829 OpPos: pos,
830 Op: tok,
831 X: x,
832 }
833 }
834 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
835 panic("unreachable")
836}
837
838// list = '[' ']'
839// | '[' expr ']'
840// | '[' expr expr_list ']'
841// | '[' expr (FOR loop_variables IN expr)+ ']'
842func (p *parser) parseList() Expr {
843 lbrack := p.nextToken()
844 if p.tok == RBRACK {
845 // empty List
846 rbrack := p.nextToken()
847 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
848 }
849
850 x := p.parseTest()
851
852 if p.tok == FOR {
853 // list comprehension
854 return p.parseComprehensionSuffix(lbrack, x, RBRACK)
855 }
856
857 exprs := []Expr{x}
858 if p.tok == COMMA {
859 // multi-item list literal
860 exprs = p.parseExprs(exprs, true) // allow trailing comma
861 }
862
863 rbrack := p.consume(RBRACK)
864 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
865}
866
867// dict = '{' '}'
868// | '{' dict_entry_list '}'
869// | '{' dict_entry FOR loop_variables IN expr '}'
870func (p *parser) parseDict() Expr {
871 lbrace := p.nextToken()
872 if p.tok == RBRACE {
873 // empty dict
874 rbrace := p.nextToken()
875 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
876 }
877
878 x := p.parseDictEntry()
879
880 if p.tok == FOR {
881 // dict comprehension
882 return p.parseComprehensionSuffix(lbrace, x, RBRACE)
883 }
884
885 entries := []Expr{x}
886 for p.tok == COMMA {
887 p.nextToken()
888 if p.tok == RBRACE {
889 break
890 }
891 entries = append(entries, p.parseDictEntry())
892 }
893
894 rbrace := p.consume(RBRACE)
895 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
896}
897
898// dict_entry = test ':' test
899func (p *parser) parseDictEntry() *DictEntry {
900 k := p.parseTest()
901 colon := p.consume(COLON)
902 v := p.parseTest()
903 return &DictEntry{Key: k, Colon: colon, Value: v}
904}
905
906// comp_suffix = FOR loopvars IN expr comp_suffix
907// | IF expr comp_suffix
908// | ']' or ')' (end)
909//
910// There can be multiple FOR/IF clauses; the first is always a FOR.
911func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
912 var clauses []Node
913 for p.tok != endBrace {
914 if p.tok == FOR {
915 pos := p.nextToken()
916 vars := p.parseForLoopVariables()
917 in := p.consume(IN)
918 // Following Python 3, the operand of IN cannot be:
919 // - a conditional expression ('x if y else z'),
920 // due to conflicts in Python grammar
921 // ('if' is used by the comprehension);
922 // - a lambda expression
923 // - an unparenthesized tuple.
924 x := p.parseTestPrec(0)
925 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
926 } else if p.tok == IF {
927 pos := p.nextToken()
alandonovanf9faf3b2018-01-16 10:06:02 -0500928 cond := p.parseTestNoCond()
Alan Donovan312d1a52017-10-02 10:10:28 -0400929 clauses = append(clauses, &IfClause{If: pos, Cond: cond})
930 } else {
931 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
932 }
933 }
934 rbrace := p.nextToken()
935
936 return &Comprehension{
937 Curly: endBrace == RBRACE,
938 Lbrack: lbrace,
939 Body: body,
940 Clauses: clauses,
941 Rbrack: rbrace,
942 }
943}
944
945func terminatesExprList(tok Token) bool {
946 switch tok {
947 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
948 return true
949 }
950 return false
951}
Laurent Le Brun689fc222018-02-22 19:37:18 +0100952
953// Comment assignment.
954// We build two lists of all subnodes, preorder and postorder.
955// The preorder list is ordered by start location, with outer nodes first.
956// The postorder list is ordered by end location, with outer nodes last.
957// We use the preorder list to assign each whole-line comment to the syntax
958// immediately following it, and we use the postorder list to assign each
959// end-of-line comment to the syntax immediately preceding it.
960
961// flattenAST returns the list of AST nodes, both in prefix order and in postfix
962// order.
963func flattenAST(root Node) (pre, post []Node) {
964 stack := []Node{}
965 Walk(root, func(n Node) bool {
966 if n != nil {
967 pre = append(pre, n)
968 stack = append(stack, n)
969 } else {
970 post = append(post, stack[len(stack)-1])
971 stack = stack[:len(stack)-1]
972 }
973 return true
974 })
975 return pre, post
976}
977
978// assignComments attaches comments to nearby syntax.
979func (p *parser) assignComments(n Node) {
980 // Leave early if there are no comments
981 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
982 return
983 }
984
985 pre, post := flattenAST(n)
986
987 // Assign line comments to syntax immediately following.
988 line := p.in.lineComments
989 for _, x := range pre {
990 start, _ := x.Span()
991
992 switch x.(type) {
993 case *File:
994 continue
995 }
996
997 for len(line) > 0 && !start.isBefore(line[0].Start) {
998 x.AllocComments()
999 x.Comments().Before = append(x.Comments().Before, line[0])
1000 line = line[1:]
1001 }
1002 }
1003
1004 // Remaining line comments go at end of file.
1005 if len(line) > 0 {
1006 n.AllocComments()
1007 n.Comments().After = append(n.Comments().After, line...)
1008 }
1009
1010 // Assign suffix comments to syntax immediately before.
1011 suffix := p.in.suffixComments
1012 for i := len(post) - 1; i >= 0; i-- {
1013 x := post[i]
1014
1015 // Do not assign suffix comments to file
1016 switch x.(type) {
1017 case *File:
1018 continue
1019 }
1020
1021 _, end := x.Span()
1022 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
1023 x.AllocComments()
1024 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
1025 suffix = suffix[:len(suffix)-1]
1026 }
1027 }
1028}