blob: 9582bd7e4c52566492516858a3ae175ac29fbf11 [file] [log] [blame]
Alan Donovan312d1a52017-10-02 10:10:28 -04001// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
7import (
8 "bytes"
9 "fmt"
10 "go/build"
11 "io/ioutil"
12 "path/filepath"
alandonovanebe61bd2021-02-12 16:57:32 -050013 "strings"
Alan Donovan312d1a52017-10-02 10:10:28 -040014 "testing"
15)
16
17func scan(src interface{}) (tokens string, err error) {
Alan Donovane3deafe2018-10-23 11:05:09 -040018 sc, err := newScanner("foo.star", src, false)
Alan Donovan312d1a52017-10-02 10:10:28 -040019 if err != nil {
20 return "", err
21 }
22
23 defer sc.recover(&err)
24
25 var buf bytes.Buffer
26 var val tokenValue
27 for {
28 tok := sc.nextToken(&val)
29
30 if buf.Len() > 0 {
31 buf.WriteByte(' ')
32 }
33 switch tok {
34 case EOF:
35 buf.WriteString("EOF")
36 case IDENT:
37 buf.WriteString(val.raw)
38 case INT:
Mohamed Elqdusy69e96152018-01-22 20:00:29 +010039 if val.bigInt != nil {
40 fmt.Fprintf(&buf, "%d", val.bigInt)
41 } else {
42 fmt.Fprintf(&buf, "%d", val.int)
43 }
Alan Donovan312d1a52017-10-02 10:10:28 -040044 case FLOAT:
45 fmt.Fprintf(&buf, "%e", val.float)
alandonovanebe61bd2021-02-12 16:57:32 -050046 case STRING, BYTES:
47 buf.WriteString(Quote(val.string, tok == BYTES))
Alan Donovan312d1a52017-10-02 10:10:28 -040048 default:
49 buf.WriteString(tok.String())
50 }
51 if tok == EOF {
52 break
53 }
54 }
55 return buf.String(), nil
56}
57
58func TestScanner(t *testing.T) {
59 for _, test := range []struct {
60 input, want string
61 }{
62 {``, "EOF"},
63 {`123`, "123 EOF"},
64 {`x.y`, "x . y EOF"},
65 {`chocolate.éclair`, `chocolate . éclair EOF`},
66 {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
67 {`print(x)`, "print ( x ) EOF"},
68 {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
Alan Donovanae063842017-10-10 15:46:17 -040069 {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
Alan Donovan312d1a52017-10-02 10:10:28 -040070 {`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
71 {`# hello
72print(x)`, "print ( x ) EOF"},
73 {`# hello
74print(1)
75cc_binary(name="foo")
76def f(x):
77 return x+1
78print(1)
79`,
80 `print ( 1 ) newline ` +
81 `cc_binary ( name = "foo" ) newline ` +
82 `def f ( x ) : newline ` +
83 `indent return x + 1 newline ` +
84 `outdent print ( 1 ) newline ` +
85 `EOF`},
86 // EOF should act line an implicit newline.
87 {`def f(): pass`,
88 "def f ( ) : pass EOF"},
89 {`def f():
90 pass`,
91 "def f ( ) : newline indent pass newline outdent EOF"},
92 {`def f():
93 pass
94# oops`,
95 "def f ( ) : newline indent pass newline outdent EOF"},
96 {`def f():
97 pass \
98`,
99 "def f ( ) : newline indent pass newline outdent EOF"},
100 {`def f():
101 pass
102`,
103 "def f ( ) : newline indent pass newline outdent EOF"},
104 {`pass
105
106
107pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
108 {`def f():
109 pass
110 `, "def f ( ) : newline indent pass newline outdent EOF"},
111 {`def f():
112 pass
113 ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
114 {"pass", "pass EOF"},
115 {"pass\n", "pass newline EOF"},
116 {"pass\n ", "pass newline EOF"},
117 {"pass\n \n", "pass newline EOF"},
118 {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"},
119 {`x = 1 + \
1202`, `x = 1 + 2 EOF`},
121 {`x = 'a\nb'`, `x = "a\nb" EOF`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400122 {`x = r'a\nb'`, `x = "a\\nb" EOF`},
alandonovane8819e82020-03-26 17:56:36 -0400123 {"x = 'a\\\nb'", `x = "ab" EOF`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400124 {`x = '\''`, `x = "'" EOF`},
125 {`x = "\""`, `x = "\"" EOF`},
126 {`x = r'\''`, `x = "\\'" EOF`},
127 {`x = '''\''''`, `x = "'" EOF`},
128 {`x = r'''\''''`, `x = "\\'" EOF`},
129 {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
130 {"x = '''a\nb'''", `x = "a\nb" EOF`},
131 {"x = '''a\rb'''", `x = "a\nb" EOF`},
132 {"x = '''a\r\nb'''", `x = "a\nb" EOF`},
133 {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
134 {"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
135 {"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
136 {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
137 {"a\rb", `a newline b EOF`},
138 {"a\nb", `a newline b EOF`},
139 {"a\r\nb", `a newline b EOF`},
140 {"a\n\nb", `a newline b EOF`},
141 // numbers
142 {"0", `0 EOF`},
143 {"00", `0 EOF`},
144 {"0.", `0.000000e+00 EOF`},
145 {"0.e1", `0.000000e+00 EOF`},
146 {".0", `0.000000e+00 EOF`},
147 {"0.0", `0.000000e+00 EOF`},
148 {".e1", `. e1 EOF`},
149 {"1", `1 EOF`},
150 {"1.", `1.000000e+00 EOF`},
151 {".1", `1.000000e-01 EOF`},
152 {".1e1", `1.000000e+00 EOF`},
153 {".1e+1", `1.000000e+00 EOF`},
154 {".1e-1", `1.000000e-02 EOF`},
155 {"1e1", `1.000000e+01 EOF`},
156 {"1e+1", `1.000000e+01 EOF`},
157 {"1e-1", `1.000000e-01 EOF`},
158 {"123", `123 EOF`},
159 {"123e45", `1.230000e+47 EOF`},
Mohamed Elqdusy69e96152018-01-22 20:00:29 +0100160 {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`},
161 {"12345678901234567890", `12345678901234567890 EOF`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400162 // hex
163 {"0xA", `10 EOF`},
164 {"0xAAG", `170 G EOF`},
Alan Donovane3deafe2018-10-23 11:05:09 -0400165 {"0xG", `foo.star:1:1: invalid hex literal`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400166 {"0XA", `10 EOF`},
Alan Donovane3deafe2018-10-23 11:05:09 -0400167 {"0XG", `foo.star:1:1: invalid hex literal`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400168 {"0xA.", `10 . EOF`},
169 {"0xA.e1", `10 . e1 EOF`},
Mohamed Elqdusy69e96152018-01-22 20:00:29 +0100170 {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`},
Mohamed Elqdusy3b32df92018-01-08 17:20:46 +0100171 // binary
172 {"0b1010", `10 EOF`},
173 {"0B111101", `61 EOF`},
Alan Donovane3deafe2018-10-23 11:05:09 -0400174 {"0b3", `foo.star:1:3: invalid binary literal`},
Mohamed Elqdusy3b32df92018-01-08 17:20:46 +0100175 {"0b1010201", `10 201 EOF`},
176 {"0b1010.01", `10 1.000000e-02 EOF`},
177 {"0b0000", `0 EOF`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400178 // octal
179 {"0o123", `83 EOF`},
180 {"0o12834", `10 834 EOF`},
181 {"0o12934", `10 934 EOF`},
182 {"0o12934.", `10 9.340000e+02 EOF`},
183 {"0o12934.1", `10 9.341000e+02 EOF`},
184 {"0o12934e1", `10 9.340000e+03 EOF`},
185 {"0o123.", `83 . EOF`},
186 {"0o123.1", `83 1.000000e-01 EOF`},
alandonovana4759312019-05-28 16:17:46 -0400187 {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
Alan Donovane3deafe2018-10-23 11:05:09 -0400188 {"012834", `foo.star:1:1: invalid int literal`},
189 {"012934", `foo.star:1:1: invalid int literal`},
190 {"i = 012934", `foo.star:1:5: invalid int literal`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400191 // octal escapes in string literals
192 {`"\037"`, `"\x1f" EOF`},
alandonovanebe61bd2021-02-12 16:57:32 -0500193 {`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
194 {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8'
195 {`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
196 // hex escapes
197 {`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
198 {`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
199 {`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
200 {`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
201 {`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
202 {`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
203 {`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
204 // Unicode escapes
205 // \uXXXX
206 {`"\u0400"`, `"Ѐ" EOF`},
207 {`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
208 {`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
209 {`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
210 {`"\u4E16"`, `"世" EOF`},
211 {`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
212 // \UXXXXXXXX
213 {`"\U00000400"`, `"Ѐ" EOF`},
214 {`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
215 {`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
216 {`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
217 {`"\U0010FFFF"`, `"\U0010ffff" EOF`},
218 {`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
219 {`"\U0001F63F"`, `"😿" EOF`},
220 {`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
alandonovan16e44b12020-03-26 10:23:16 -0400221
222 // backslash escapes
223 // As in Go, a backslash must escape something.
224 // (Python started issuing a deprecation warning in 3.6.)
225 {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
226 {`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
227 {`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
228 {`"\""`, `"\"" EOF`},
alandonovan2319aeb2020-06-15 13:21:36 -0400229 {`"\'"`, `"'" EOF`},
alandonovan16e44b12020-03-26 10:23:16 -0400230 {`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
231 {`'\''`, `"'" EOF`},
alandonovan2319aeb2020-06-15 13:21:36 -0400232 {`'\"'`, `"\"" EOF`},
alandonovan16e44b12020-03-26 10:23:16 -0400233 {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
234 {`"""\""""`, `"\"" EOF`},
alandonovan2319aeb2020-06-15 13:21:36 -0400235 {`"""\'"""`, `"'" EOF`},
alandonovan16e44b12020-03-26 10:23:16 -0400236 {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
237 {`'''\''''`, `"'" EOF`},
alandonovan2319aeb2020-06-15 13:21:36 -0400238 {`'''\"'''`, `"\"" EOF`},
alandonovan16e44b12020-03-26 10:23:16 -0400239 {`r"\w"`, `"\\w" EOF`},
240 {`r"\""`, `"\\\"" EOF`},
241 {`r"\'"`, `"\\'" EOF`},
242 {`r'\w'`, `"\\w" EOF`},
243 {`r'\''`, `"\\'" EOF`},
244 {`r'\"'`, `"\\\"" EOF`},
245 {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
246 {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
alandonovanebe61bd2021-02-12 16:57:32 -0500247 // bytes literals (where they differ from text strings)
248 {`b"AЀ世😿"`, `b"AЀ世😿`}, // 1-4 byte encodings, literal
249 {`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`}, // same, as escapes
250 {`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
251 {`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
252 {`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
Alan Donovan312d1a52017-10-02 10:10:28 -0400253 // floats starting with octal digits
254 {"012934.", `1.293400e+04 EOF`},
255 {"012934.1", `1.293410e+04 EOF`},
256 {"012934e1", `1.293400e+05 EOF`},
257 {"0123.", `1.230000e+02 EOF`},
258 {"0123.1", `1.231000e+02 EOF`},
alandonovan7a866322018-11-21 14:57:52 -0500259 // github.com/google/skylark/issues/16
Alan Donovane3deafe2018-10-23 11:05:09 -0400260 {"x ! 0", "foo.star:1:3: unexpected input character '!'"},
alandonovanf6c29bf2019-01-03 15:19:20 -0500261 // github.com/google/starlark-go/issues/80
262 {"([{<>}])", "( [ { < > } ] ) EOF"},
alandonovan30e71c62019-01-04 13:48:12 -0500263 {"f();", "f ( ) ; EOF"},
alandonovanc1a3d542019-01-31 13:43:01 -0500264 // github.com/google/starlark-go/issues/104
265 {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`},
266 {`while cond: pass`, "while cond : pass EOF"},
alandonovan22479a32019-01-09 12:15:31 -0500267 // github.com/google/starlark-go/issues/107
268 {"~= ~= 5", "~ = ~ = 5 EOF"},
alandonovan988906f2019-08-20 13:32:00 -0400269 {"0in", "0 in EOF"},
270 {"0or", "foo.star:1:3: invalid octal literal"},
271 {"6in", "6 in EOF"},
272 {"6or", "6 or EOF"},
Alan Donovan312d1a52017-10-02 10:10:28 -0400273 } {
274 got, err := scan(test.input)
275 if err != nil {
Ariel Mashrakicaa37b42017-10-27 19:27:28 +0300276 got = err.(Error).Error()
Alan Donovan312d1a52017-10-02 10:10:28 -0400277 }
alandonovanebe61bd2021-02-12 16:57:32 -0500278 // Prefix match allows us to truncate errors in expecations.
279 // Success cases all end in EOF.
280 if !strings.HasPrefix(got, test.want) {
Alan Donovan312d1a52017-10-02 10:10:28 -0400281 t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
282 }
283 }
284}
285
Alan Donovane3deafe2018-10-23 11:05:09 -0400286// dataFile is the same as starlarktest.DataFile.
Alan Donovan312d1a52017-10-02 10:10:28 -0400287// We make a copy to avoid a dependency cycle.
288var dataFile = func(pkgdir, filename string) string {
Alan Donovan6beab7e2018-10-31 17:53:09 -0400289 return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
Alan Donovan312d1a52017-10-02 10:10:28 -0400290}
291
292func BenchmarkScan(b *testing.B) {
Alan Donovan6beab7e2018-10-31 17:53:09 -0400293 filename := dataFile("syntax", "testdata/scan.star")
Alan Donovan312d1a52017-10-02 10:10:28 -0400294 b.StopTimer()
295 data, err := ioutil.ReadFile(filename)
296 if err != nil {
297 b.Fatal(err)
298 }
299 b.StartTimer()
300
301 for i := 0; i < b.N; i++ {
Laurent Le Brun689fc222018-02-22 19:37:18 +0100302 sc, err := newScanner(filename, data, false)
Alan Donovan312d1a52017-10-02 10:10:28 -0400303 if err != nil {
304 b.Fatal(err)
305 }
306 var val tokenValue
307 for sc.nextToken(&val) != EOF {
308 }
309 }
310}