Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 1 | // Copyright 2017 The Bazel Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package syntax |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
| 10 | "go/build" |
| 11 | "io/ioutil" |
| 12 | "path/filepath" |
| 13 | "testing" |
| 14 | ) |
| 15 | |
| 16 | func scan(src interface{}) (tokens string, err error) { |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 17 | sc, err := newScanner("foo.star", src, false) |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 18 | if err != nil { |
| 19 | return "", err |
| 20 | } |
| 21 | |
| 22 | defer sc.recover(&err) |
| 23 | |
| 24 | var buf bytes.Buffer |
| 25 | var val tokenValue |
| 26 | for { |
| 27 | tok := sc.nextToken(&val) |
| 28 | |
| 29 | if buf.Len() > 0 { |
| 30 | buf.WriteByte(' ') |
| 31 | } |
| 32 | switch tok { |
| 33 | case EOF: |
| 34 | buf.WriteString("EOF") |
| 35 | case IDENT: |
| 36 | buf.WriteString(val.raw) |
| 37 | case INT: |
Mohamed Elqdusy | 69e9615 | 2018-01-22 20:00:29 +0100 | [diff] [blame] | 38 | if val.bigInt != nil { |
| 39 | fmt.Fprintf(&buf, "%d", val.bigInt) |
| 40 | } else { |
| 41 | fmt.Fprintf(&buf, "%d", val.int) |
| 42 | } |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 43 | case FLOAT: |
| 44 | fmt.Fprintf(&buf, "%e", val.float) |
| 45 | case STRING: |
| 46 | fmt.Fprintf(&buf, "%q", val.string) |
| 47 | default: |
| 48 | buf.WriteString(tok.String()) |
| 49 | } |
| 50 | if tok == EOF { |
| 51 | break |
| 52 | } |
| 53 | } |
| 54 | return buf.String(), nil |
| 55 | } |
| 56 | |
| 57 | func TestScanner(t *testing.T) { |
| 58 | for _, test := range []struct { |
| 59 | input, want string |
| 60 | }{ |
| 61 | {``, "EOF"}, |
| 62 | {`123`, "123 EOF"}, |
| 63 | {`x.y`, "x . y EOF"}, |
| 64 | {`chocolate.éclair`, `chocolate . éclair EOF`}, |
| 65 | {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, |
| 66 | {`print(x)`, "print ( x ) EOF"}, |
| 67 | {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, |
Alan Donovan | ae06384 | 2017-10-10 15:46:17 -0400 | [diff] [blame] | 68 | {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 69 | {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, |
| 70 | {`# hello |
| 71 | print(x)`, "print ( x ) EOF"}, |
| 72 | {`# hello |
| 73 | print(1) |
| 74 | cc_binary(name="foo") |
| 75 | def f(x): |
| 76 | return x+1 |
| 77 | print(1) |
| 78 | `, |
| 79 | `print ( 1 ) newline ` + |
| 80 | `cc_binary ( name = "foo" ) newline ` + |
| 81 | `def f ( x ) : newline ` + |
| 82 | `indent return x + 1 newline ` + |
| 83 | `outdent print ( 1 ) newline ` + |
| 84 | `EOF`}, |
| 85 | // EOF should act line an implicit newline. |
| 86 | {`def f(): pass`, |
| 87 | "def f ( ) : pass EOF"}, |
| 88 | {`def f(): |
| 89 | pass`, |
| 90 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 91 | {`def f(): |
| 92 | pass |
| 93 | # oops`, |
| 94 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 95 | {`def f(): |
| 96 | pass \ |
| 97 | `, |
| 98 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 99 | {`def f(): |
| 100 | pass |
| 101 | `, |
| 102 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 103 | {`pass |
| 104 | |
| 105 | |
| 106 | pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated |
| 107 | {`def f(): |
| 108 | pass |
| 109 | `, "def f ( ) : newline indent pass newline outdent EOF"}, |
| 110 | {`def f(): |
| 111 | pass |
| 112 | ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, |
| 113 | {"pass", "pass EOF"}, |
| 114 | {"pass\n", "pass newline EOF"}, |
| 115 | {"pass\n ", "pass newline EOF"}, |
| 116 | {"pass\n \n", "pass newline EOF"}, |
| 117 | {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, |
| 118 | {`x = 1 + \ |
| 119 | 2`, `x = 1 + 2 EOF`}, |
| 120 | {`x = 'a\nb'`, `x = "a\nb" EOF`}, |
| 121 | {`x = 'a\zb'`, `x = "a\\zb" EOF`}, |
| 122 | {`x = r'a\nb'`, `x = "a\\nb" EOF`}, |
| 123 | {`x = '\''`, `x = "'" EOF`}, |
| 124 | {`x = "\""`, `x = "\"" EOF`}, |
| 125 | {`x = r'\''`, `x = "\\'" EOF`}, |
| 126 | {`x = '''\''''`, `x = "'" EOF`}, |
| 127 | {`x = r'''\''''`, `x = "\\'" EOF`}, |
| 128 | {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, |
| 129 | {"x = '''a\nb'''", `x = "a\nb" EOF`}, |
| 130 | {"x = '''a\rb'''", `x = "a\nb" EOF`}, |
| 131 | {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, |
| 132 | {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, |
| 133 | {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, |
| 134 | {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, |
| 135 | {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, |
| 136 | {"a\rb", `a newline b EOF`}, |
| 137 | {"a\nb", `a newline b EOF`}, |
| 138 | {"a\r\nb", `a newline b EOF`}, |
| 139 | {"a\n\nb", `a newline b EOF`}, |
| 140 | // numbers |
| 141 | {"0", `0 EOF`}, |
| 142 | {"00", `0 EOF`}, |
| 143 | {"0.", `0.000000e+00 EOF`}, |
| 144 | {"0.e1", `0.000000e+00 EOF`}, |
| 145 | {".0", `0.000000e+00 EOF`}, |
| 146 | {"0.0", `0.000000e+00 EOF`}, |
| 147 | {".e1", `. e1 EOF`}, |
| 148 | {"1", `1 EOF`}, |
| 149 | {"1.", `1.000000e+00 EOF`}, |
| 150 | {".1", `1.000000e-01 EOF`}, |
| 151 | {".1e1", `1.000000e+00 EOF`}, |
| 152 | {".1e+1", `1.000000e+00 EOF`}, |
| 153 | {".1e-1", `1.000000e-02 EOF`}, |
| 154 | {"1e1", `1.000000e+01 EOF`}, |
| 155 | {"1e+1", `1.000000e+01 EOF`}, |
| 156 | {"1e-1", `1.000000e-01 EOF`}, |
| 157 | {"123", `123 EOF`}, |
| 158 | {"123e45", `1.230000e+47 EOF`}, |
Mohamed Elqdusy | 69e9615 | 2018-01-22 20:00:29 +0100 | [diff] [blame] | 159 | {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`}, |
| 160 | {"12345678901234567890", `12345678901234567890 EOF`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 161 | // hex |
| 162 | {"0xA", `10 EOF`}, |
| 163 | {"0xAAG", `170 G EOF`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 164 | {"0xG", `foo.star:1:1: invalid hex literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 165 | {"0XA", `10 EOF`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 166 | {"0XG", `foo.star:1:1: invalid hex literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 167 | {"0xA.", `10 . EOF`}, |
| 168 | {"0xA.e1", `10 . e1 EOF`}, |
Mohamed Elqdusy | 69e9615 | 2018-01-22 20:00:29 +0100 | [diff] [blame] | 169 | {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`}, |
Mohamed Elqdusy | 3b32df9 | 2018-01-08 17:20:46 +0100 | [diff] [blame] | 170 | // binary |
| 171 | {"0b1010", `10 EOF`}, |
| 172 | {"0B111101", `61 EOF`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 173 | {"0b3", `foo.star:1:3: invalid binary literal`}, |
Mohamed Elqdusy | 3b32df9 | 2018-01-08 17:20:46 +0100 | [diff] [blame] | 174 | {"0b1010201", `10 201 EOF`}, |
| 175 | {"0b1010.01", `10 1.000000e-02 EOF`}, |
| 176 | {"0b0000", `0 EOF`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 177 | // octal |
| 178 | {"0o123", `83 EOF`}, |
| 179 | {"0o12834", `10 834 EOF`}, |
| 180 | {"0o12934", `10 934 EOF`}, |
| 181 | {"0o12934.", `10 9.340000e+02 EOF`}, |
| 182 | {"0o12934.1", `10 9.341000e+02 EOF`}, |
| 183 | {"0o12934e1", `10 9.340000e+03 EOF`}, |
| 184 | {"0o123.", `83 . EOF`}, |
| 185 | {"0o123.1", `83 1.000000e-01 EOF`}, |
| 186 | // TODO(adonovan): reenable later. |
| 187 | // {"0123", `obsolete form of octal literal; use 0o123`}, |
| 188 | {"0123", `83 EOF`}, |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 189 | {"012834", `foo.star:1:1: invalid int literal`}, |
| 190 | {"012934", `foo.star:1:1: invalid int literal`}, |
| 191 | {"i = 012934", `foo.star:1:5: invalid int literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 192 | // octal escapes in string literals |
| 193 | {`"\037"`, `"\x1f" EOF`}, |
| 194 | {`"\377"`, `"\xff" EOF`}, |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame] | 195 | {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 196 | {`"\400"`, `foo.star:1:1: invalid escape sequence \400`}, // unlike Python 2 and 3 |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 197 | // Backslashes that are not part of escapes are treated literally, |
| 198 | // but this behavior will change; see b/34519173. |
| 199 | {`"\+"`, `"\\+" EOF`}, |
| 200 | {`"\o123"`, `"\\o123" EOF`}, |
| 201 | // floats starting with octal digits |
| 202 | {"012934.", `1.293400e+04 EOF`}, |
| 203 | {"012934.1", `1.293410e+04 EOF`}, |
| 204 | {"012934e1", `1.293400e+05 EOF`}, |
| 205 | {"0123.", `1.230000e+02 EOF`}, |
| 206 | {"0123.1", `1.231000e+02 EOF`}, |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame] | 207 | // issue #16 |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 208 | {"x ! 0", "foo.star:1:3: unexpected input character '!'"}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 209 | } { |
| 210 | got, err := scan(test.input) |
| 211 | if err != nil { |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame] | 212 | got = err.(Error).Error() |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 213 | } |
| 214 | if test.want != got { |
| 215 | t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) |
| 216 | } |
| 217 | } |
| 218 | } |
| 219 | |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 220 | // dataFile is the same as starlarktest.DataFile. |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 221 | // We make a copy to avoid a dependency cycle. |
| 222 | var dataFile = func(pkgdir, filename string) string { |
| 223 | return filepath.Join(build.Default.GOPATH, "src/github.com/google", pkgdir, filename) |
| 224 | } |
| 225 | |
| 226 | func BenchmarkScan(b *testing.B) { |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame^] | 227 | filename := dataFile("starlark/syntax", "testdata/scan.star") |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 228 | b.StopTimer() |
| 229 | data, err := ioutil.ReadFile(filename) |
| 230 | if err != nil { |
| 231 | b.Fatal(err) |
| 232 | } |
| 233 | b.StartTimer() |
| 234 | |
| 235 | for i := 0; i < b.N; i++ { |
Laurent Le Brun | 689fc22 | 2018-02-22 19:37:18 +0100 | [diff] [blame] | 236 | sc, err := newScanner(filename, data, false) |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 237 | if err != nil { |
| 238 | b.Fatal(err) |
| 239 | } |
| 240 | var val tokenValue |
| 241 | for sc.nextToken(&val) != EOF { |
| 242 | } |
| 243 | } |
| 244 | } |