blob: 915f8be681c139f2064e39fbe411f75253d61731 [file] [log] [blame]
Alan Donovan312d1a52017-10-02 10:10:28 -04001// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
7import (
8 "bytes"
9 "fmt"
10 "go/build"
11 "io/ioutil"
12 "path/filepath"
13 "testing"
14)
15
16func scan(src interface{}) (tokens string, err error) {
17 sc, err := newScanner("foo.sky", src)
18 if err != nil {
19 return "", err
20 }
21
22 defer sc.recover(&err)
23
24 var buf bytes.Buffer
25 var val tokenValue
26 for {
27 tok := sc.nextToken(&val)
28
29 if buf.Len() > 0 {
30 buf.WriteByte(' ')
31 }
32 switch tok {
33 case EOF:
34 buf.WriteString("EOF")
35 case IDENT:
36 buf.WriteString(val.raw)
37 case INT:
38 fmt.Fprintf(&buf, "%d", val.int)
39 case FLOAT:
40 fmt.Fprintf(&buf, "%e", val.float)
41 case STRING:
42 fmt.Fprintf(&buf, "%q", val.string)
43 default:
44 buf.WriteString(tok.String())
45 }
46 if tok == EOF {
47 break
48 }
49 }
50 return buf.String(), nil
51}
52
53func TestScanner(t *testing.T) {
54 for _, test := range []struct {
55 input, want string
56 }{
57 {``, "EOF"},
58 {`123`, "123 EOF"},
59 {`x.y`, "x . y EOF"},
60 {`chocolate.éclair`, `chocolate . éclair EOF`},
61 {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
62 {`print(x)`, "print ( x ) EOF"},
63 {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
Alan Donovanae063842017-10-10 15:46:17 -040064 {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
Alan Donovan312d1a52017-10-02 10:10:28 -040065 {`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
66 {`# hello
67print(x)`, "print ( x ) EOF"},
68 {`# hello
69print(1)
70cc_binary(name="foo")
71def f(x):
72 return x+1
73print(1)
74`,
75 `print ( 1 ) newline ` +
76 `cc_binary ( name = "foo" ) newline ` +
77 `def f ( x ) : newline ` +
78 `indent return x + 1 newline ` +
79 `outdent print ( 1 ) newline ` +
80 `EOF`},
81 // EOF should act line an implicit newline.
82 {`def f(): pass`,
83 "def f ( ) : pass EOF"},
84 {`def f():
85 pass`,
86 "def f ( ) : newline indent pass newline outdent EOF"},
87 {`def f():
88 pass
89# oops`,
90 "def f ( ) : newline indent pass newline outdent EOF"},
91 {`def f():
92 pass \
93`,
94 "def f ( ) : newline indent pass newline outdent EOF"},
95 {`def f():
96 pass
97`,
98 "def f ( ) : newline indent pass newline outdent EOF"},
99 {`pass
100
101
102pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
103 {`def f():
104 pass
105 `, "def f ( ) : newline indent pass newline outdent EOF"},
106 {`def f():
107 pass
108 ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
109 {"pass", "pass EOF"},
110 {"pass\n", "pass newline EOF"},
111 {"pass\n ", "pass newline EOF"},
112 {"pass\n \n", "pass newline EOF"},
113 {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"},
114 {`x = 1 + \
1152`, `x = 1 + 2 EOF`},
116 {`x = 'a\nb'`, `x = "a\nb" EOF`},
117 {`x = 'a\zb'`, `x = "a\\zb" EOF`},
118 {`x = r'a\nb'`, `x = "a\\nb" EOF`},
119 {`x = '\''`, `x = "'" EOF`},
120 {`x = "\""`, `x = "\"" EOF`},
121 {`x = r'\''`, `x = "\\'" EOF`},
122 {`x = '''\''''`, `x = "'" EOF`},
123 {`x = r'''\''''`, `x = "\\'" EOF`},
124 {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
125 {"x = '''a\nb'''", `x = "a\nb" EOF`},
126 {"x = '''a\rb'''", `x = "a\nb" EOF`},
127 {"x = '''a\r\nb'''", `x = "a\nb" EOF`},
128 {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
129 {"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
130 {"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
131 {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
132 {"a\rb", `a newline b EOF`},
133 {"a\nb", `a newline b EOF`},
134 {"a\r\nb", `a newline b EOF`},
135 {"a\n\nb", `a newline b EOF`},
136 // numbers
137 {"0", `0 EOF`},
138 {"00", `0 EOF`},
139 {"0.", `0.000000e+00 EOF`},
140 {"0.e1", `0.000000e+00 EOF`},
141 {".0", `0.000000e+00 EOF`},
142 {"0.0", `0.000000e+00 EOF`},
143 {".e1", `. e1 EOF`},
144 {"1", `1 EOF`},
145 {"1.", `1.000000e+00 EOF`},
146 {".1", `1.000000e-01 EOF`},
147 {".1e1", `1.000000e+00 EOF`},
148 {".1e+1", `1.000000e+00 EOF`},
149 {".1e-1", `1.000000e-02 EOF`},
150 {"1e1", `1.000000e+01 EOF`},
151 {"1e+1", `1.000000e+01 EOF`},
152 {"1e-1", `1.000000e-01 EOF`},
153 {"123", `123 EOF`},
154 {"123e45", `1.230000e+47 EOF`},
155 // hex
156 {"0xA", `10 EOF`},
157 {"0xAAG", `170 G EOF`},
Ariel Mashrakicaa37b42017-10-27 19:27:28 +0300158 {"0xG", `foo.sky:1:1: invalid hex literal`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400159 {"0XA", `10 EOF`},
Ariel Mashrakicaa37b42017-10-27 19:27:28 +0300160 {"0XG", `foo.sky:1:1: invalid hex literal`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400161 {"0xA.", `10 . EOF`},
162 {"0xA.e1", `10 . e1 EOF`},
163 // octal
164 {"0o123", `83 EOF`},
165 {"0o12834", `10 834 EOF`},
166 {"0o12934", `10 934 EOF`},
167 {"0o12934.", `10 9.340000e+02 EOF`},
168 {"0o12934.1", `10 9.341000e+02 EOF`},
169 {"0o12934e1", `10 9.340000e+03 EOF`},
170 {"0o123.", `83 . EOF`},
171 {"0o123.1", `83 1.000000e-01 EOF`},
172 // TODO(adonovan): reenable later.
173 // {"0123", `obsolete form of octal literal; use 0o123`},
174 {"0123", `83 EOF`},
Ariel Mashrakicaa37b42017-10-27 19:27:28 +0300175 {"012834", `foo.sky:1:1: invalid int literal`},
176 {"012934", `foo.sky:1:1: invalid int literal`},
177 {"i = 012934", `foo.sky:1:5: invalid int literal`},
Alan Donovan312d1a52017-10-02 10:10:28 -0400178 // octal escapes in string literals
179 {`"\037"`, `"\x1f" EOF`},
180 {`"\377"`, `"\xff" EOF`},
Ariel Mashrakicaa37b42017-10-27 19:27:28 +0300181 {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8'
182 {`"\400"`, `foo.sky:1:1: invalid escape sequence \400`}, // unlike Python 2 and 3
Alan Donovan312d1a52017-10-02 10:10:28 -0400183 // Backslashes that are not part of escapes are treated literally,
184 // but this behavior will change; see b/34519173.
185 {`"\+"`, `"\\+" EOF`},
186 {`"\o123"`, `"\\o123" EOF`},
187 // floats starting with octal digits
188 {"012934.", `1.293400e+04 EOF`},
189 {"012934.1", `1.293410e+04 EOF`},
190 {"012934e1", `1.293400e+05 EOF`},
191 {"0123.", `1.230000e+02 EOF`},
192 {"0123.1", `1.231000e+02 EOF`},
Ariel Mashrakicaa37b42017-10-27 19:27:28 +0300193 // issue #16
194 {"x ! 0", "foo.sky:1:3: unexpected input character '!'"},
Alan Donovan312d1a52017-10-02 10:10:28 -0400195 } {
196 got, err := scan(test.input)
197 if err != nil {
Ariel Mashrakicaa37b42017-10-27 19:27:28 +0300198 got = err.(Error).Error()
Alan Donovan312d1a52017-10-02 10:10:28 -0400199 }
200 if test.want != got {
201 t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
202 }
203 }
204}
205
206// dataFile is the same as skylarktest.DataFile.
207// We make a copy to avoid a dependency cycle.
208var dataFile = func(pkgdir, filename string) string {
209 return filepath.Join(build.Default.GOPATH, "src/github.com/google", pkgdir, filename)
210}
211
212func BenchmarkScan(b *testing.B) {
213 filename := dataFile("skylark/syntax", "testdata/def.bzl")
214 b.StopTimer()
215 data, err := ioutil.ReadFile(filename)
216 if err != nil {
217 b.Fatal(err)
218 }
219 b.StartTimer()
220
221 for i := 0; i < b.N; i++ {
222 sc, err := newScanner(filename, data)
223 if err != nil {
224 b.Fatal(err)
225 }
226 var val tokenValue
227 for sc.nextToken(&val) != EOF {
228 }
229 }
230}