blob: af79cc1850b1664f727e1bc50f9ffbe8cf576476 [file] [log] [blame]
Alan Donovan312d1a52017-10-02 10:10:28 -04001// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
Alan Donovane3deafe2018-10-23 11:05:09 -04007// This file defines a recursive-descent parser for Starlark.
8// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
Alan Donovan312d1a52017-10-02 10:10:28 -04009//
10// TODO(adonovan): use syntax.Error more systematically throughout the
11// package. Verify that error positions are correct using the
12// chunkedfile mechanism.
13
alandonovanb7e3b1f2018-12-12 17:14:58 -050014import "log"
Alan Donovan312d1a52017-10-02 10:10:28 -040015
16// Enable this flag to print the token stream and log.Fatal on the first error.
17const debug = false
18
Laurent Le Brun689fc222018-02-22 19:37:18 +010019// A Mode value is a set of flags (or 0) that controls optional parser functionality.
20type Mode uint
21
22const (
23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
24)
25
Alan Donovan312d1a52017-10-02 10:10:28 -040026// Parse parses the input data and returns the corresponding parse tree.
27//
28// If src != nil, ParseFile parses the source from src and the filename
29// is only used when recording position information.
30// The type of the argument for the src parameter must be string,
31// []byte, or io.Reader.
32// If src == nil, ParseFile parses the file specified by filename.
Laurent Le Brun689fc222018-02-22 19:37:18 +010033func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
34 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040035 if err != nil {
36 return nil, err
37 }
38 p := parser{in: in}
39 defer p.in.recover(&err)
40
41 p.nextToken() // read first lookahead token
42 f = p.parseFile()
43 if f != nil {
44 f.Path = filename
45 }
Laurent Le Brun689fc222018-02-22 19:37:18 +010046 p.assignComments(f)
Alan Donovan312d1a52017-10-02 10:10:28 -040047 return f, nil
48}
49
Alan Donovane3deafe2018-10-23 11:05:09 -040050// ParseExpr parses a Starlark expression.
Alan Donovan312d1a52017-10-02 10:10:28 -040051// See Parse for explanation of parameters.
Laurent Le Brun689fc222018-02-22 19:37:18 +010052func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
53 in, err := newScanner(filename, src, mode&RetainComments != 0)
Alan Donovan312d1a52017-10-02 10:10:28 -040054 if err != nil {
55 return nil, err
56 }
57 p := parser{in: in}
58 defer p.in.recover(&err)
59
60 p.nextToken() // read first lookahead token
61 expr = p.parseTest()
62
Alan Donovanae063842017-10-10 15:46:17 -040063 // A following newline (e.g. "f()\n") appears outside any brackets,
alandonovanbd7aaf52017-10-11 16:34:02 -040064 // on a non-blank line, and thus results in a NEWLINE token.
Alan Donovanae063842017-10-10 15:46:17 -040065 if p.tok == NEWLINE {
66 p.nextToken()
67 }
68
Alan Donovan312d1a52017-10-02 10:10:28 -040069 if p.tok != EOF {
70 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
71 }
Laurent Le Brun689fc222018-02-22 19:37:18 +010072 p.assignComments(expr)
Alan Donovan312d1a52017-10-02 10:10:28 -040073 return expr, nil
74}
75
76type parser struct {
77 in *scanner
78 tok Token
79 tokval tokenValue
80}
81
82// nextToken advances the scanner and returns the position of the
83// previous token.
84func (p *parser) nextToken() Position {
85 oldpos := p.tokval.pos
86 p.tok = p.in.nextToken(&p.tokval)
87 // enable to see the token stream
88 if debug {
89 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
90 }
91 return oldpos
92}
93
94// file_input = (NEWLINE | stmt)* EOF
95func (p *parser) parseFile() *File {
96 var stmts []Stmt
97 for p.tok != EOF {
98 if p.tok == NEWLINE {
99 p.nextToken()
100 continue
101 }
102 stmts = p.parseStmt(stmts)
103 }
104 return &File{Stmts: stmts}
105}
106
107func (p *parser) parseStmt(stmts []Stmt) []Stmt {
108 if p.tok == DEF {
109 return append(stmts, p.parseDefStmt())
110 } else if p.tok == IF {
111 return append(stmts, p.parseIfStmt())
112 } else if p.tok == FOR {
113 return append(stmts, p.parseForStmt())
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100114 } else if p.tok == WHILE {
115 return append(stmts, p.parseWhileStmt())
Alan Donovan312d1a52017-10-02 10:10:28 -0400116 }
Diego Siqueira6cae3352017-10-08 01:16:14 +0200117 return p.parseSimpleStmt(stmts)
Alan Donovan312d1a52017-10-02 10:10:28 -0400118}
119
120func (p *parser) parseDefStmt() Stmt {
121 defpos := p.nextToken() // consume DEF
122 id := p.parseIdent()
123 p.consume(LPAREN)
124 params := p.parseParams()
125 p.consume(RPAREN)
126 p.consume(COLON)
127 body := p.parseSuite()
128 return &DefStmt{
129 Def: defpos,
130 Name: id,
131 Function: Function{
132 StartPos: defpos,
133 Params: params,
134 Body: body,
135 },
136 }
137}
138
139func (p *parser) parseIfStmt() Stmt {
140 ifpos := p.nextToken() // consume IF
141 cond := p.parseTest()
142 p.consume(COLON)
143 body := p.parseSuite()
144 ifStmt := &IfStmt{
145 If: ifpos,
146 Cond: cond,
147 True: body,
148 }
149 tail := ifStmt
150 for p.tok == ELIF {
151 elifpos := p.nextToken() // consume ELIF
152 cond := p.parseTest()
153 p.consume(COLON)
154 body := p.parseSuite()
155 elif := &IfStmt{
156 If: elifpos,
157 Cond: cond,
158 True: body,
159 }
160 tail.ElsePos = elifpos
161 tail.False = []Stmt{elif}
162 tail = elif
163 }
164 if p.tok == ELSE {
165 tail.ElsePos = p.nextToken() // consume ELSE
166 p.consume(COLON)
167 tail.False = p.parseSuite()
168 }
169 return ifStmt
170}
171
172func (p *parser) parseForStmt() Stmt {
173 forpos := p.nextToken() // consume FOR
174 vars := p.parseForLoopVariables()
175 p.consume(IN)
176 x := p.parseExpr(false)
177 p.consume(COLON)
178 body := p.parseSuite()
179 return &ForStmt{
180 For: forpos,
181 Vars: vars,
182 X: x,
183 Body: body,
184 }
185}
186
Alessandro Arzilli678bafe2018-12-07 17:28:35 +0100187func (p *parser) parseWhileStmt() Stmt {
188 whilepos := p.nextToken() // consume WHILE
189 cond := p.parseTest()
190 p.consume(COLON)
191 body := p.parseSuite()
192 return &WhileStmt{
193 While: whilepos,
194 Cond: cond,
195 Body: body,
196 }
197}
198
Alan Donovan312d1a52017-10-02 10:10:28 -0400199// Equivalent to 'exprlist' production in Python grammar.
200//
201// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
202func (p *parser) parseForLoopVariables() Expr {
203 // Avoid parseExpr because it would consume the IN token
204 // following x in "for x in y: ...".
205 v := p.parsePrimaryWithSuffix()
206 if p.tok != COMMA {
207 return v
208 }
209
210 list := []Expr{v}
211 for p.tok == COMMA {
212 p.nextToken()
213 if terminatesExprList(p.tok) {
214 break
215 }
216 list = append(list, p.parsePrimaryWithSuffix())
217 }
218 return &TupleExpr{List: list}
219}
220
221// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
222func (p *parser) parseSimpleStmt(stmts []Stmt) []Stmt {
223 for {
224 stmts = append(stmts, p.parseSmallStmt())
225 if p.tok != SEMI {
226 break
227 }
228 p.nextToken() // consume SEMI
229 if p.tok == NEWLINE || p.tok == EOF {
230 break
231 }
232 }
233 // EOF without NEWLINE occurs in `if x: pass`, for example.
234 if p.tok != EOF {
235 p.consume(NEWLINE)
236 }
237 return stmts
238}
239
240// small_stmt = RETURN expr?
241// | PASS | BREAK | CONTINUE
alandonovan6696fc32017-10-20 10:55:17 -0400242// | LOAD ...
Hittorp0a5e39a2018-08-09 15:02:30 +0300243// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign
Alan Donovan312d1a52017-10-02 10:10:28 -0400244// | expr
245func (p *parser) parseSmallStmt() Stmt {
alandonovan6696fc32017-10-20 10:55:17 -0400246 switch p.tok {
247 case RETURN:
Alan Donovan312d1a52017-10-02 10:10:28 -0400248 pos := p.nextToken() // consume RETURN
249 var result Expr
250 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
251 result = p.parseExpr(false)
252 }
253 return &ReturnStmt{Return: pos, Result: result}
Alan Donovan312d1a52017-10-02 10:10:28 -0400254
Alan Donovan312d1a52017-10-02 10:10:28 -0400255 case BREAK, CONTINUE, PASS:
256 tok := p.tok
257 pos := p.nextToken() // consume it
258 return &BranchStmt{Token: tok, TokenPos: pos}
alandonovan6696fc32017-10-20 10:55:17 -0400259
260 case LOAD:
261 return p.parseLoadStmt()
Alan Donovan312d1a52017-10-02 10:10:28 -0400262 }
263
264 // Assignment
265 x := p.parseExpr(false)
266 switch p.tok {
Hittorp0a5e39a2018-08-09 15:02:30 +0300267 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
Alan Donovan312d1a52017-10-02 10:10:28 -0400268 op := p.tok
269 pos := p.nextToken() // consume op
270 rhs := p.parseExpr(false)
271 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
272 }
273
Alan Donovan312d1a52017-10-02 10:10:28 -0400274 // Expression statement (e.g. function call, doc string).
275 return &ExprStmt{X: x}
276}
277
alandonovan6696fc32017-10-20 10:55:17 -0400278// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
279func (p *parser) parseLoadStmt() *LoadStmt {
280 loadPos := p.nextToken() // consume LOAD
281 lparen := p.consume(LPAREN)
Alan Donovan312d1a52017-10-02 10:10:28 -0400282
alandonovan6696fc32017-10-20 10:55:17 -0400283 if p.tok != STRING {
284 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
285 }
286 module := p.parsePrimary().(*Literal)
287
288 var from, to []*Ident
289 for p.tok != RPAREN && p.tok != EOF {
290 p.consume(COMMA)
291 if p.tok == RPAREN {
292 break // allow trailing comma
293 }
294 switch p.tok {
295 case STRING:
Alan Donovan312d1a52017-10-02 10:10:28 -0400296 // load("module", "id")
297 // To name is same as original.
alandonovan6696fc32017-10-20 10:55:17 -0400298 lit := p.parsePrimary().(*Literal)
Alan Donovan312d1a52017-10-02 10:10:28 -0400299 id := &Ident{
300 NamePos: lit.TokenPos.add(`"`),
301 Name: lit.Value.(string),
302 }
alandonovan6696fc32017-10-20 10:55:17 -0400303 to = append(to, id)
304 from = append(from, id)
Alan Donovan312d1a52017-10-02 10:10:28 -0400305
alandonovan6696fc32017-10-20 10:55:17 -0400306 case IDENT:
307 // load("module", to="from")
308 id := p.parseIdent()
309 to = append(to, id)
310 if p.tok != EQ {
alandonovan4b42bbf2017-11-10 13:22:52 -0500311 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
alandonovan6696fc32017-10-20 10:55:17 -0400312 }
313 p.consume(EQ)
314 if p.tok != STRING {
315 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
316 }
317 lit := p.parsePrimary().(*Literal)
318 from = append(from, &Ident{
319 NamePos: lit.TokenPos.add(`"`),
320 Name: lit.Value.(string),
321 })
322
323 case RPAREN:
324 p.in.errorf(p.in.pos, "trailing comma in load statement")
325
326 default:
327 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
328 }
329 }
330 rparen := p.consume(RPAREN)
331
332 if len(to) == 0 {
333 p.in.errorf(lparen, "load statement must import at least 1 symbol")
Alan Donovan312d1a52017-10-02 10:10:28 -0400334 }
335 return &LoadStmt{
336 Load: loadPos,
337 Module: module,
338 To: to,
339 From: from,
alandonovan6696fc32017-10-20 10:55:17 -0400340 Rparen: rparen,
Alan Donovan312d1a52017-10-02 10:10:28 -0400341 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400342}
343
344// suite is typically what follows a COLON (e.g. after DEF or FOR).
345// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
346func (p *parser) parseSuite() []Stmt {
347 if p.tok == NEWLINE {
348 p.nextToken() // consume NEWLINE
349 p.consume(INDENT)
350 var stmts []Stmt
351 for p.tok != OUTDENT && p.tok != EOF {
352 stmts = p.parseStmt(stmts)
353 }
354 p.consume(OUTDENT)
355 return stmts
356 }
357
358 return p.parseSimpleStmt(nil)
359}
360
361func (p *parser) parseIdent() *Ident {
362 if p.tok != IDENT {
363 p.in.error(p.in.pos, "not an identifier")
364 }
365 id := &Ident{
366 NamePos: p.tokval.pos,
367 Name: p.tokval.raw,
368 }
369 p.nextToken()
370 return id
371}
372
373func (p *parser) consume(t Token) Position {
374 if p.tok != t {
375 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
376 }
377 return p.nextToken()
378}
379
380// params = (param COMMA)* param
381// |
382//
383// param = IDENT
384// | IDENT EQ test
385// | STAR IDENT
386// | STARSTAR IDENT
387//
388// parseParams parses a parameter list. The resulting expressions are of the form:
389//
390// *Ident
391// *Binary{Op: EQ, X: *Ident, Y: Expr}
392// *Unary{Op: STAR, X: *Ident}
393// *Unary{Op: STARSTAR, X: *Ident}
394func (p *parser) parseParams() []Expr {
395 var params []Expr
396 stars := false
397 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
398 if len(params) > 0 {
399 p.consume(COMMA)
400 }
401 if p.tok == RPAREN {
402 // list can end with a COMMA if there is neither * nor **
403 if stars {
404 p.in.errorf(p.in.pos, "got %#v, want parameter", p.tok)
405 }
406 break
407 }
408
Taras Tsugrii6be536f2018-12-09 18:07:56 -0800409 // *args or **kwargs
410 if p.tok == STAR || p.tok == STARSTAR {
Alan Donovan312d1a52017-10-02 10:10:28 -0400411 stars = true
alandonovan97d77632018-12-09 21:18:13 -0500412 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400413 pos := p.nextToken()
414 id := p.parseIdent()
415 params = append(params, &UnaryExpr{
416 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500417 Op: op,
Alan Donovan312d1a52017-10-02 10:10:28 -0400418 X: id,
419 })
420 continue
421 }
422
423 // IDENT
424 // IDENT = test
425 id := p.parseIdent()
426 if p.tok == EQ { // default value
427 eq := p.nextToken()
428 dflt := p.parseTest()
429 params = append(params, &BinaryExpr{
430 X: id,
431 OpPos: eq,
432 Op: EQ,
433 Y: dflt,
434 })
435 continue
436 }
437
438 params = append(params, id)
439 }
440 return params
441}
442
443// parseExpr parses an expression, possible consisting of a
444// comma-separated list of 'test' expressions.
445//
446// In many cases we must use parseTest to avoid ambiguity such as
447// f(x, y) vs. f((x, y)).
448func (p *parser) parseExpr(inParens bool) Expr {
449 x := p.parseTest()
450 if p.tok != COMMA {
451 return x
452 }
453
454 // tuple
455 exprs := p.parseExprs([]Expr{x}, inParens)
456 return &TupleExpr{List: exprs}
457}
458
459// parseExprs parses a comma-separated list of expressions, starting with the comma.
460// It is used to parse tuples and list elements.
461// expr_list = (',' expr)* ','?
462func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
463 for p.tok == COMMA {
464 pos := p.nextToken()
465 if terminatesExprList(p.tok) {
466 if !allowTrailingComma {
467 p.in.error(pos, "unparenthesized tuple with trailing comma")
468 }
469 break
470 }
471 exprs = append(exprs, p.parseTest())
472 }
473 return exprs
474}
475
476// parseTest parses a 'test', a single-component expression.
477func (p *parser) parseTest() Expr {
478 if p.tok == LAMBDA {
alandonovanf9faf3b2018-01-16 10:06:02 -0500479 return p.parseLambda(true)
Alan Donovan312d1a52017-10-02 10:10:28 -0400480 }
481
482 x := p.parseTestPrec(0)
483
484 // conditional expression (t IF cond ELSE f)
485 if p.tok == IF {
486 ifpos := p.nextToken()
487 cond := p.parseTestPrec(0)
488 if p.tok != ELSE {
489 p.in.error(ifpos, "conditional expression without else clause")
490 }
491 elsepos := p.nextToken()
492 else_ := p.parseTest()
493 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
494 }
495
496 return x
497}
498
alandonovanf9faf3b2018-01-16 10:06:02 -0500499// parseTestNoCond parses a a single-component expression without
500// consuming a trailing 'if expr else expr'.
501func (p *parser) parseTestNoCond() Expr {
502 if p.tok == LAMBDA {
503 return p.parseLambda(false)
504 }
505 return p.parseTestPrec(0)
506}
507
508// parseLambda parses a lambda expression.
509// The allowCond flag allows the body to be an 'a if b else c' conditional.
510func (p *parser) parseLambda(allowCond bool) Expr {
511 lambda := p.nextToken()
512 var params []Expr
513 if p.tok != COLON {
514 params = p.parseParams()
515 }
516 p.consume(COLON)
517
518 var body Expr
519 if allowCond {
520 body = p.parseTest()
521 } else {
522 body = p.parseTestNoCond()
523 }
524
525 return &LambdaExpr{
526 Lambda: lambda,
527 Function: Function{
528 StartPos: lambda,
529 Params: params,
530 Body: []Stmt{&ReturnStmt{Result: body}},
531 },
532 }
533}
534
Alan Donovan312d1a52017-10-02 10:10:28 -0400535func (p *parser) parseTestPrec(prec int) Expr {
536 if prec >= len(preclevels) {
537 return p.parsePrimaryWithSuffix()
538 }
539
540 // expr = NOT expr
541 if p.tok == NOT && prec == int(precedence[NOT]) {
542 pos := p.nextToken()
alandonovanc7df0452018-10-04 17:10:12 -0400543 x := p.parseTestPrec(prec)
Alan Donovan312d1a52017-10-02 10:10:28 -0400544 return &UnaryExpr{
545 OpPos: pos,
546 Op: NOT,
547 X: x,
548 }
549 }
550
551 return p.parseBinopExpr(prec)
552}
553
554// expr = test (OP test)*
555// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
556func (p *parser) parseBinopExpr(prec int) Expr {
557 x := p.parseTestPrec(prec + 1)
558 for first := true; ; first = false {
559 if p.tok == NOT {
560 p.nextToken() // consume NOT
561 // In this context, NOT must be followed by IN.
562 // Replace NOT IN by a single NOT_IN token.
563 if p.tok != IN {
564 p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
565 }
566 p.tok = NOT_IN
567 }
568
569 // Binary operator of specified precedence?
570 opprec := int(precedence[p.tok])
571 if opprec < prec {
572 return x
573 }
574
575 // Comparisons are non-associative.
576 if !first && opprec == int(precedence[EQL]) {
577 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
578 x.(*BinaryExpr).Op, p.tok)
579 }
580
581 op := p.tok
582 pos := p.nextToken()
583 y := p.parseTestPrec(opprec + 1)
alandonovan60e4b3d2018-04-02 12:32:39 -0400584 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
Alan Donovan312d1a52017-10-02 10:10:28 -0400585 }
586}
587
588// precedence maps each operator to its precedence (0-7), or -1 for other tokens.
589var precedence [maxToken]int8
590
591// preclevels groups operators of equal precedence.
592// Comparisons are nonassociative; other binary operators associate to the left.
Hittorp0a5e39a2018-08-09 15:02:30 +0300593// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
alandonovan04aba6e2018-11-05 17:45:33 -0500594// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
Alan Donovan312d1a52017-10-02 10:10:28 -0400595var preclevels = [...][]Token{
alandonovanc7df0452018-10-04 17:10:12 -0400596 {OR}, // or
597 {AND}, // and
598 {NOT}, // not (unary)
Alan Donovan312d1a52017-10-02 10:10:28 -0400599 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
alandonovanc7df0452018-10-04 17:10:12 -0400600 {PIPE}, // |
601 {CIRCUMFLEX}, // ^
602 {AMP}, // &
603 {LTLT, GTGT}, // << >>
604 {MINUS, PLUS}, // -
605 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / //
Alan Donovan312d1a52017-10-02 10:10:28 -0400606}
607
608func init() {
609 // populate precedence table
610 for i := range precedence {
611 precedence[i] = -1
612 }
613 for level, tokens := range preclevels {
614 for _, tok := range tokens {
615 precedence[tok] = int8(level)
616 }
617 }
618}
619
Alan Donovan312d1a52017-10-02 10:10:28 -0400620// primary_with_suffix = primary
621// | primary '.' IDENT
622// | primary slice_suffix
623// | primary call_suffix
624func (p *parser) parsePrimaryWithSuffix() Expr {
625 x := p.parsePrimary()
626 for {
627 switch p.tok {
628 case DOT:
629 dot := p.nextToken()
630 id := p.parseIdent()
631 x = &DotExpr{Dot: dot, X: x, Name: id}
632 case LBRACK:
633 x = p.parseSliceSuffix(x)
634 case LPAREN:
635 x = p.parseCallSuffix(x)
636 default:
637 return x
638 }
639 }
640}
641
642// slice_suffix = '[' expr? ':' expr? ':' expr? ']'
643func (p *parser) parseSliceSuffix(x Expr) Expr {
644 lbrack := p.nextToken()
645 var lo, hi, step Expr
646 if p.tok != COLON {
647 y := p.parseExpr(false)
648
649 // index x[y]
650 if p.tok == RBRACK {
651 rbrack := p.nextToken()
652 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
653 }
654
655 lo = y
656 }
657
658 // slice or substring x[lo:hi:step]
659 if p.tok == COLON {
660 p.nextToken()
661 if p.tok != COLON && p.tok != RBRACK {
662 hi = p.parseTest()
663 }
664 }
665 if p.tok == COLON {
666 p.nextToken()
667 if p.tok != RBRACK {
668 step = p.parseTest()
669 }
670 }
671 rbrack := p.consume(RBRACK)
672 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
673}
674
675// call_suffix = '(' arg_list? ')'
676func (p *parser) parseCallSuffix(fn Expr) Expr {
677 lparen := p.consume(LPAREN)
678 var rparen Position
679 var args []Expr
680 if p.tok == RPAREN {
681 rparen = p.nextToken()
682 } else {
683 args = p.parseArgs()
684 rparen = p.consume(RPAREN)
685 }
686 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
687}
688
689// parseArgs parses a list of actual parameter values (arguments).
690// It mirrors the structure of parseParams.
691// arg_list = ((arg COMMA)* arg COMMA?)?
692func (p *parser) parseArgs() []Expr {
693 var args []Expr
694 stars := false
695 for p.tok != RPAREN && p.tok != EOF {
696 if len(args) > 0 {
697 p.consume(COMMA)
698 }
699 if p.tok == RPAREN {
700 // list can end with a COMMA if there is neither * nor **
701 if stars {
702 p.in.errorf(p.in.pos, `got %#v, want argument`, p.tok)
703 }
704 break
705 }
706
alandonovan97d77632018-12-09 21:18:13 -0500707 // *args or **kwargs
708 if p.tok == STAR || p.tok == STARSTAR {
Alan Donovan312d1a52017-10-02 10:10:28 -0400709 stars = true
alandonovan97d77632018-12-09 21:18:13 -0500710 op := p.tok
Alan Donovan312d1a52017-10-02 10:10:28 -0400711 pos := p.nextToken()
712 x := p.parseTest()
713 args = append(args, &UnaryExpr{
714 OpPos: pos,
alandonovan97d77632018-12-09 21:18:13 -0500715 Op: op,
Alan Donovan312d1a52017-10-02 10:10:28 -0400716 X: x,
717 })
718 continue
719 }
720
721 // We use a different strategy from Bazel here to stay within LL(1).
722 // Instead of looking ahead two tokens (IDENT, EQ) we parse
723 // 'test = test' then check that the first was an IDENT.
724 x := p.parseTest()
725
726 if p.tok == EQ {
727 // name = value
728 if _, ok := x.(*Ident); !ok {
729 p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
730 }
731 eq := p.nextToken()
732 y := p.parseTest()
733 x = &BinaryExpr{
734 X: x,
735 OpPos: eq,
736 Op: EQ,
737 Y: y,
738 }
739 }
740
741 args = append(args, x)
742 }
743 return args
744}
745
746// primary = IDENT
747// | INT | FLOAT
748// | STRING
749// | '[' ... // list literal or comprehension
750// | '{' ... // dict literal or comprehension
751// | '(' ... // tuple or parenthesized expression
alandonovanc7df0452018-10-04 17:10:12 -0400752// | ('-'|'+'|'~') primary_with_suffix
Alan Donovan312d1a52017-10-02 10:10:28 -0400753func (p *parser) parsePrimary() Expr {
754 switch p.tok {
755 case IDENT:
756 return p.parseIdent()
757
758 case INT, FLOAT, STRING:
759 var val interface{}
760 tok := p.tok
761 switch tok {
762 case INT:
Mohamed Elqdusy69e96152018-01-22 20:00:29 +0100763 if p.tokval.bigInt != nil {
764 val = p.tokval.bigInt
765 } else {
766 val = p.tokval.int
767 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400768 case FLOAT:
769 val = p.tokval.float
770 case STRING:
771 val = p.tokval.string
772 }
773 raw := p.tokval.raw
774 pos := p.nextToken()
775 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
776
777 case LBRACK:
778 return p.parseList()
779
780 case LBRACE:
781 return p.parseDict()
782
783 case LPAREN:
784 lparen := p.nextToken()
785 if p.tok == RPAREN {
786 // empty tuple
787 rparen := p.nextToken()
788 return &TupleExpr{Lparen: lparen, Rparen: rparen}
789 }
790 e := p.parseExpr(true) // allow trailing comma
Laurent Le Brun28ceca72018-02-26 15:01:53 +0100791 rparen := p.consume(RPAREN)
792 return &ParenExpr{
793 Lparen: lparen,
794 X: e,
795 Rparen: rparen,
796 }
Alan Donovan312d1a52017-10-02 10:10:28 -0400797
alandonovanc7df0452018-10-04 17:10:12 -0400798 case MINUS, PLUS, TILDE: // unary
Alan Donovan312d1a52017-10-02 10:10:28 -0400799 tok := p.tok
800 pos := p.nextToken()
801 x := p.parsePrimaryWithSuffix()
802 return &UnaryExpr{
803 OpPos: pos,
804 Op: tok,
805 X: x,
806 }
807 }
808 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
809 panic("unreachable")
810}
811
812// list = '[' ']'
813// | '[' expr ']'
814// | '[' expr expr_list ']'
815// | '[' expr (FOR loop_variables IN expr)+ ']'
816func (p *parser) parseList() Expr {
817 lbrack := p.nextToken()
818 if p.tok == RBRACK {
819 // empty List
820 rbrack := p.nextToken()
821 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
822 }
823
824 x := p.parseTest()
825
826 if p.tok == FOR {
827 // list comprehension
828 return p.parseComprehensionSuffix(lbrack, x, RBRACK)
829 }
830
831 exprs := []Expr{x}
832 if p.tok == COMMA {
833 // multi-item list literal
834 exprs = p.parseExprs(exprs, true) // allow trailing comma
835 }
836
837 rbrack := p.consume(RBRACK)
838 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
839}
840
841// dict = '{' '}'
842// | '{' dict_entry_list '}'
843// | '{' dict_entry FOR loop_variables IN expr '}'
844func (p *parser) parseDict() Expr {
845 lbrace := p.nextToken()
846 if p.tok == RBRACE {
847 // empty dict
848 rbrace := p.nextToken()
849 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
850 }
851
852 x := p.parseDictEntry()
853
854 if p.tok == FOR {
855 // dict comprehension
856 return p.parseComprehensionSuffix(lbrace, x, RBRACE)
857 }
858
859 entries := []Expr{x}
860 for p.tok == COMMA {
861 p.nextToken()
862 if p.tok == RBRACE {
863 break
864 }
865 entries = append(entries, p.parseDictEntry())
866 }
867
868 rbrace := p.consume(RBRACE)
869 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
870}
871
872// dict_entry = test ':' test
873func (p *parser) parseDictEntry() *DictEntry {
874 k := p.parseTest()
875 colon := p.consume(COLON)
876 v := p.parseTest()
877 return &DictEntry{Key: k, Colon: colon, Value: v}
878}
879
880// comp_suffix = FOR loopvars IN expr comp_suffix
881// | IF expr comp_suffix
882// | ']' or ')' (end)
883//
884// There can be multiple FOR/IF clauses; the first is always a FOR.
885func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
886 var clauses []Node
887 for p.tok != endBrace {
888 if p.tok == FOR {
889 pos := p.nextToken()
890 vars := p.parseForLoopVariables()
891 in := p.consume(IN)
892 // Following Python 3, the operand of IN cannot be:
893 // - a conditional expression ('x if y else z'),
894 // due to conflicts in Python grammar
895 // ('if' is used by the comprehension);
896 // - a lambda expression
897 // - an unparenthesized tuple.
898 x := p.parseTestPrec(0)
899 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
900 } else if p.tok == IF {
901 pos := p.nextToken()
alandonovanf9faf3b2018-01-16 10:06:02 -0500902 cond := p.parseTestNoCond()
Alan Donovan312d1a52017-10-02 10:10:28 -0400903 clauses = append(clauses, &IfClause{If: pos, Cond: cond})
904 } else {
905 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
906 }
907 }
908 rbrace := p.nextToken()
909
910 return &Comprehension{
911 Curly: endBrace == RBRACE,
912 Lbrack: lbrace,
913 Body: body,
914 Clauses: clauses,
915 Rbrack: rbrace,
916 }
917}
918
919func terminatesExprList(tok Token) bool {
920 switch tok {
921 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
922 return true
923 }
924 return false
925}
Laurent Le Brun689fc222018-02-22 19:37:18 +0100926
927// Comment assignment.
928// We build two lists of all subnodes, preorder and postorder.
929// The preorder list is ordered by start location, with outer nodes first.
930// The postorder list is ordered by end location, with outer nodes last.
931// We use the preorder list to assign each whole-line comment to the syntax
932// immediately following it, and we use the postorder list to assign each
933// end-of-line comment to the syntax immediately preceding it.
934
935// flattenAST returns the list of AST nodes, both in prefix order and in postfix
936// order.
937func flattenAST(root Node) (pre, post []Node) {
938 stack := []Node{}
939 Walk(root, func(n Node) bool {
940 if n != nil {
941 pre = append(pre, n)
942 stack = append(stack, n)
943 } else {
944 post = append(post, stack[len(stack)-1])
945 stack = stack[:len(stack)-1]
946 }
947 return true
948 })
949 return pre, post
950}
951
952// assignComments attaches comments to nearby syntax.
953func (p *parser) assignComments(n Node) {
954 // Leave early if there are no comments
955 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
956 return
957 }
958
959 pre, post := flattenAST(n)
960
961 // Assign line comments to syntax immediately following.
962 line := p.in.lineComments
963 for _, x := range pre {
964 start, _ := x.Span()
965
966 switch x.(type) {
967 case *File:
968 continue
969 }
970
971 for len(line) > 0 && !start.isBefore(line[0].Start) {
972 x.AllocComments()
973 x.Comments().Before = append(x.Comments().Before, line[0])
974 line = line[1:]
975 }
976 }
977
978 // Remaining line comments go at end of file.
979 if len(line) > 0 {
980 n.AllocComments()
981 n.Comments().After = append(n.Comments().After, line...)
982 }
983
984 // Assign suffix comments to syntax immediately before.
985 suffix := p.in.suffixComments
986 for i := len(post) - 1; i >= 0; i-- {
987 x := post[i]
988
989 // Do not assign suffix comments to file
990 switch x.(type) {
991 case *File:
992 continue
993 }
994
995 _, end := x.Span()
996 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
997 x.AllocComments()
998 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
999 suffix = suffix[:len(suffix)-1]
1000 }
1001 }
1002}