Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 1 | // Copyright 2017 The Bazel Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package syntax |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
| 10 | "go/build" |
| 11 | "io/ioutil" |
| 12 | "path/filepath" |
| 13 | "testing" |
| 14 | ) |
| 15 | |
| 16 | func scan(src interface{}) (tokens string, err error) { |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 17 | sc, err := newScanner("foo.star", src, false) |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 18 | if err != nil { |
| 19 | return "", err |
| 20 | } |
| 21 | |
| 22 | defer sc.recover(&err) |
| 23 | |
| 24 | var buf bytes.Buffer |
| 25 | var val tokenValue |
| 26 | for { |
| 27 | tok := sc.nextToken(&val) |
| 28 | |
| 29 | if buf.Len() > 0 { |
| 30 | buf.WriteByte(' ') |
| 31 | } |
| 32 | switch tok { |
| 33 | case EOF: |
| 34 | buf.WriteString("EOF") |
| 35 | case IDENT: |
| 36 | buf.WriteString(val.raw) |
| 37 | case INT: |
Mohamed Elqdusy | 69e9615 | 2018-01-22 20:00:29 +0100 | [diff] [blame] | 38 | if val.bigInt != nil { |
| 39 | fmt.Fprintf(&buf, "%d", val.bigInt) |
| 40 | } else { |
| 41 | fmt.Fprintf(&buf, "%d", val.int) |
| 42 | } |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 43 | case FLOAT: |
| 44 | fmt.Fprintf(&buf, "%e", val.float) |
| 45 | case STRING: |
| 46 | fmt.Fprintf(&buf, "%q", val.string) |
| 47 | default: |
| 48 | buf.WriteString(tok.String()) |
| 49 | } |
| 50 | if tok == EOF { |
| 51 | break |
| 52 | } |
| 53 | } |
| 54 | return buf.String(), nil |
| 55 | } |
| 56 | |
| 57 | func TestScanner(t *testing.T) { |
| 58 | for _, test := range []struct { |
| 59 | input, want string |
| 60 | }{ |
| 61 | {``, "EOF"}, |
| 62 | {`123`, "123 EOF"}, |
| 63 | {`x.y`, "x . y EOF"}, |
| 64 | {`chocolate.éclair`, `chocolate . éclair EOF`}, |
| 65 | {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, |
| 66 | {`print(x)`, "print ( x ) EOF"}, |
| 67 | {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, |
Alan Donovan | ae06384 | 2017-10-10 15:46:17 -0400 | [diff] [blame] | 68 | {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 69 | {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, |
| 70 | {`# hello |
| 71 | print(x)`, "print ( x ) EOF"}, |
| 72 | {`# hello |
| 73 | print(1) |
| 74 | cc_binary(name="foo") |
| 75 | def f(x): |
| 76 | return x+1 |
| 77 | print(1) |
| 78 | `, |
| 79 | `print ( 1 ) newline ` + |
| 80 | `cc_binary ( name = "foo" ) newline ` + |
| 81 | `def f ( x ) : newline ` + |
| 82 | `indent return x + 1 newline ` + |
| 83 | `outdent print ( 1 ) newline ` + |
| 84 | `EOF`}, |
| 85 | // EOF should act line an implicit newline. |
| 86 | {`def f(): pass`, |
| 87 | "def f ( ) : pass EOF"}, |
| 88 | {`def f(): |
| 89 | pass`, |
| 90 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 91 | {`def f(): |
| 92 | pass |
| 93 | # oops`, |
| 94 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 95 | {`def f(): |
| 96 | pass \ |
| 97 | `, |
| 98 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 99 | {`def f(): |
| 100 | pass |
| 101 | `, |
| 102 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 103 | {`pass |
| 104 | |
| 105 | |
| 106 | pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated |
| 107 | {`def f(): |
| 108 | pass |
| 109 | `, "def f ( ) : newline indent pass newline outdent EOF"}, |
| 110 | {`def f(): |
| 111 | pass |
| 112 | ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, |
| 113 | {"pass", "pass EOF"}, |
| 114 | {"pass\n", "pass newline EOF"}, |
| 115 | {"pass\n ", "pass newline EOF"}, |
| 116 | {"pass\n \n", "pass newline EOF"}, |
| 117 | {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, |
| 118 | {`x = 1 + \ |
| 119 | 2`, `x = 1 + 2 EOF`}, |
| 120 | {`x = 'a\nb'`, `x = "a\nb" EOF`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 121 | {`x = r'a\nb'`, `x = "a\\nb" EOF`}, |
| 122 | {`x = '\''`, `x = "'" EOF`}, |
| 123 | {`x = "\""`, `x = "\"" EOF`}, |
| 124 | {`x = r'\''`, `x = "\\'" EOF`}, |
| 125 | {`x = '''\''''`, `x = "'" EOF`}, |
| 126 | {`x = r'''\''''`, `x = "\\'" EOF`}, |
| 127 | {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, |
| 128 | {"x = '''a\nb'''", `x = "a\nb" EOF`}, |
| 129 | {"x = '''a\rb'''", `x = "a\nb" EOF`}, |
| 130 | {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, |
| 131 | {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, |
| 132 | {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, |
| 133 | {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, |
| 134 | {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, |
| 135 | {"a\rb", `a newline b EOF`}, |
| 136 | {"a\nb", `a newline b EOF`}, |
| 137 | {"a\r\nb", `a newline b EOF`}, |
| 138 | {"a\n\nb", `a newline b EOF`}, |
| 139 | // numbers |
| 140 | {"0", `0 EOF`}, |
| 141 | {"00", `0 EOF`}, |
| 142 | {"0.", `0.000000e+00 EOF`}, |
| 143 | {"0.e1", `0.000000e+00 EOF`}, |
| 144 | {".0", `0.000000e+00 EOF`}, |
| 145 | {"0.0", `0.000000e+00 EOF`}, |
| 146 | {".e1", `. e1 EOF`}, |
| 147 | {"1", `1 EOF`}, |
| 148 | {"1.", `1.000000e+00 EOF`}, |
| 149 | {".1", `1.000000e-01 EOF`}, |
| 150 | {".1e1", `1.000000e+00 EOF`}, |
| 151 | {".1e+1", `1.000000e+00 EOF`}, |
| 152 | {".1e-1", `1.000000e-02 EOF`}, |
| 153 | {"1e1", `1.000000e+01 EOF`}, |
| 154 | {"1e+1", `1.000000e+01 EOF`}, |
| 155 | {"1e-1", `1.000000e-01 EOF`}, |
| 156 | {"123", `123 EOF`}, |
| 157 | {"123e45", `1.230000e+47 EOF`}, |
Mohamed Elqdusy | 69e9615 | 2018-01-22 20:00:29 +0100 | [diff] [blame] | 158 | {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`}, |
| 159 | {"12345678901234567890", `12345678901234567890 EOF`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 160 | // hex |
| 161 | {"0xA", `10 EOF`}, |
| 162 | {"0xAAG", `170 G EOF`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 163 | {"0xG", `foo.star:1:1: invalid hex literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 164 | {"0XA", `10 EOF`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 165 | {"0XG", `foo.star:1:1: invalid hex literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 166 | {"0xA.", `10 . EOF`}, |
| 167 | {"0xA.e1", `10 . e1 EOF`}, |
Mohamed Elqdusy | 69e9615 | 2018-01-22 20:00:29 +0100 | [diff] [blame] | 168 | {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`}, |
Mohamed Elqdusy | 3b32df9 | 2018-01-08 17:20:46 +0100 | [diff] [blame] | 169 | // binary |
| 170 | {"0b1010", `10 EOF`}, |
| 171 | {"0B111101", `61 EOF`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 172 | {"0b3", `foo.star:1:3: invalid binary literal`}, |
Mohamed Elqdusy | 3b32df9 | 2018-01-08 17:20:46 +0100 | [diff] [blame] | 173 | {"0b1010201", `10 201 EOF`}, |
| 174 | {"0b1010.01", `10 1.000000e-02 EOF`}, |
| 175 | {"0b0000", `0 EOF`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 176 | // octal |
| 177 | {"0o123", `83 EOF`}, |
| 178 | {"0o12834", `10 834 EOF`}, |
| 179 | {"0o12934", `10 934 EOF`}, |
| 180 | {"0o12934.", `10 9.340000e+02 EOF`}, |
| 181 | {"0o12934.1", `10 9.341000e+02 EOF`}, |
| 182 | {"0o12934e1", `10 9.340000e+03 EOF`}, |
| 183 | {"0o123.", `83 . EOF`}, |
| 184 | {"0o123.1", `83 1.000000e-01 EOF`}, |
alandonovan | a475931 | 2019-05-28 16:17:46 -0400 | [diff] [blame] | 185 | {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 186 | {"012834", `foo.star:1:1: invalid int literal`}, |
| 187 | {"012934", `foo.star:1:1: invalid int literal`}, |
| 188 | {"i = 012934", `foo.star:1:5: invalid int literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 189 | // octal escapes in string literals |
| 190 | {`"\037"`, `"\x1f" EOF`}, |
| 191 | {`"\377"`, `"\xff" EOF`}, |
alandonovan | 7a86632 | 2018-11-21 14:57:52 -0500 | [diff] [blame] | 192 | {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 193 | {`"\400"`, `foo.star:1:1: invalid escape sequence \400`}, // unlike Python 2 and 3 |
alandonovan | 16e44b1 | 2020-03-26 10:23:16 -0400 | [diff] [blame^] | 194 | |
| 195 | // backslash escapes |
| 196 | // As in Go, a backslash must escape something. |
| 197 | // (Python started issuing a deprecation warning in 3.6.) |
| 198 | {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`}, |
| 199 | {`"\+"`, `foo.star:1:1: invalid escape sequence \+`}, |
| 200 | {`"\w"`, `foo.star:1:1: invalid escape sequence \w`}, |
| 201 | {`"\""`, `"\"" EOF`}, |
| 202 | {`"\'"`, `foo.star:1:1: invalid escape sequence \'`}, |
| 203 | {`'\w'`, `foo.star:1:1: invalid escape sequence \w`}, |
| 204 | {`'\''`, `"'" EOF`}, |
| 205 | {`'\"'`, `foo.star:1:1: invalid escape sequence \"`}, |
| 206 | {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`}, |
| 207 | {`"""\""""`, `"\"" EOF`}, |
| 208 | {`"""\'"""`, `foo.star:1:1: invalid escape sequence \'`}, |
| 209 | {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`}, |
| 210 | {`'''\''''`, `"'" EOF`}, |
| 211 | {`'''\"'''`, `foo.star:1:1: invalid escape sequence \"`}, // error |
| 212 | {`r"\w"`, `"\\w" EOF`}, |
| 213 | {`r"\""`, `"\\\"" EOF`}, |
| 214 | {`r"\'"`, `"\\'" EOF`}, |
| 215 | {`r'\w'`, `"\\w" EOF`}, |
| 216 | {`r'\''`, `"\\'" EOF`}, |
| 217 | {`r'\"'`, `"\\\"" EOF`}, |
| 218 | {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`}, |
| 219 | {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 220 | // floats starting with octal digits |
| 221 | {"012934.", `1.293400e+04 EOF`}, |
| 222 | {"012934.1", `1.293410e+04 EOF`}, |
| 223 | {"012934e1", `1.293400e+05 EOF`}, |
| 224 | {"0123.", `1.230000e+02 EOF`}, |
| 225 | {"0123.1", `1.231000e+02 EOF`}, |
alandonovan | 7a86632 | 2018-11-21 14:57:52 -0500 | [diff] [blame] | 226 | // github.com/google/skylark/issues/16 |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 227 | {"x ! 0", "foo.star:1:3: unexpected input character '!'"}, |
alandonovan | f6c29bf | 2019-01-03 15:19:20 -0500 | [diff] [blame] | 228 | // github.com/google/starlark-go/issues/80 |
| 229 | {"([{<>}])", "( [ { < > } ] ) EOF"}, |
alandonovan | 30e71c6 | 2019-01-04 13:48:12 -0500 | [diff] [blame] | 230 | {"f();", "f ( ) ; EOF"}, |
alandonovan | c1a3d54 | 2019-01-31 13:43:01 -0500 | [diff] [blame] | 231 | // github.com/google/starlark-go/issues/104 |
| 232 | {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`}, |
| 233 | {`while cond: pass`, "while cond : pass EOF"}, |
alandonovan | 22479a3 | 2019-01-09 12:15:31 -0500 | [diff] [blame] | 234 | // github.com/google/starlark-go/issues/107 |
| 235 | {"~= ~= 5", "~ = ~ = 5 EOF"}, |
alandonovan | 988906f | 2019-08-20 13:32:00 -0400 | [diff] [blame] | 236 | {"0in", "0 in EOF"}, |
| 237 | {"0or", "foo.star:1:3: invalid octal literal"}, |
| 238 | {"6in", "6 in EOF"}, |
| 239 | {"6or", "6 or EOF"}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 240 | } { |
| 241 | got, err := scan(test.input) |
| 242 | if err != nil { |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame] | 243 | got = err.(Error).Error() |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 244 | } |
| 245 | if test.want != got { |
| 246 | t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) |
| 247 | } |
| 248 | } |
| 249 | } |
| 250 | |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 251 | // dataFile is the same as starlarktest.DataFile. |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 252 | // We make a copy to avoid a dependency cycle. |
| 253 | var dataFile = func(pkgdir, filename string) string { |
Alan Donovan | 6beab7e | 2018-10-31 17:53:09 -0400 | [diff] [blame] | 254 | return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename) |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 255 | } |
| 256 | |
| 257 | func BenchmarkScan(b *testing.B) { |
Alan Donovan | 6beab7e | 2018-10-31 17:53:09 -0400 | [diff] [blame] | 258 | filename := dataFile("syntax", "testdata/scan.star") |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 259 | b.StopTimer() |
| 260 | data, err := ioutil.ReadFile(filename) |
| 261 | if err != nil { |
| 262 | b.Fatal(err) |
| 263 | } |
| 264 | b.StartTimer() |
| 265 | |
| 266 | for i := 0; i < b.N; i++ { |
Laurent Le Brun | 689fc22 | 2018-02-22 19:37:18 +0100 | [diff] [blame] | 267 | sc, err := newScanner(filename, data, false) |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 268 | if err != nil { |
| 269 | b.Fatal(err) |
| 270 | } |
| 271 | var val tokenValue |
| 272 | for sc.nextToken(&val) != EOF { |
| 273 | } |
| 274 | } |
| 275 | } |