Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 1 | // Copyright 2017 The Bazel Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package syntax |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
| 10 | "go/build" |
| 11 | "io/ioutil" |
| 12 | "path/filepath" |
| 13 | "testing" |
| 14 | ) |
| 15 | |
| 16 | func scan(src interface{}) (tokens string, err error) { |
| 17 | sc, err := newScanner("foo.sky", src) |
| 18 | if err != nil { |
| 19 | return "", err |
| 20 | } |
| 21 | |
| 22 | defer sc.recover(&err) |
| 23 | |
| 24 | var buf bytes.Buffer |
| 25 | var val tokenValue |
| 26 | for { |
| 27 | tok := sc.nextToken(&val) |
| 28 | |
| 29 | if buf.Len() > 0 { |
| 30 | buf.WriteByte(' ') |
| 31 | } |
| 32 | switch tok { |
| 33 | case EOF: |
| 34 | buf.WriteString("EOF") |
| 35 | case IDENT: |
| 36 | buf.WriteString(val.raw) |
| 37 | case INT: |
| 38 | fmt.Fprintf(&buf, "%d", val.int) |
| 39 | case FLOAT: |
| 40 | fmt.Fprintf(&buf, "%e", val.float) |
| 41 | case STRING: |
| 42 | fmt.Fprintf(&buf, "%q", val.string) |
| 43 | default: |
| 44 | buf.WriteString(tok.String()) |
| 45 | } |
| 46 | if tok == EOF { |
| 47 | break |
| 48 | } |
| 49 | } |
| 50 | return buf.String(), nil |
| 51 | } |
| 52 | |
| 53 | func TestScanner(t *testing.T) { |
| 54 | for _, test := range []struct { |
| 55 | input, want string |
| 56 | }{ |
| 57 | {``, "EOF"}, |
| 58 | {`123`, "123 EOF"}, |
| 59 | {`x.y`, "x . y EOF"}, |
| 60 | {`chocolate.éclair`, `chocolate . éclair EOF`}, |
| 61 | {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, |
| 62 | {`print(x)`, "print ( x ) EOF"}, |
| 63 | {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, |
Alan Donovan | ae06384 | 2017-10-10 15:46:17 -0400 | [diff] [blame] | 64 | {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 65 | {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, |
| 66 | {`# hello |
| 67 | print(x)`, "print ( x ) EOF"}, |
| 68 | {`# hello |
| 69 | print(1) |
| 70 | cc_binary(name="foo") |
| 71 | def f(x): |
| 72 | return x+1 |
| 73 | print(1) |
| 74 | `, |
| 75 | `print ( 1 ) newline ` + |
| 76 | `cc_binary ( name = "foo" ) newline ` + |
| 77 | `def f ( x ) : newline ` + |
| 78 | `indent return x + 1 newline ` + |
| 79 | `outdent print ( 1 ) newline ` + |
| 80 | `EOF`}, |
| 81 | // EOF should act line an implicit newline. |
| 82 | {`def f(): pass`, |
| 83 | "def f ( ) : pass EOF"}, |
| 84 | {`def f(): |
| 85 | pass`, |
| 86 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 87 | {`def f(): |
| 88 | pass |
| 89 | # oops`, |
| 90 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 91 | {`def f(): |
| 92 | pass \ |
| 93 | `, |
| 94 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 95 | {`def f(): |
| 96 | pass |
| 97 | `, |
| 98 | "def f ( ) : newline indent pass newline outdent EOF"}, |
| 99 | {`pass |
| 100 | |
| 101 | |
| 102 | pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated |
| 103 | {`def f(): |
| 104 | pass |
| 105 | `, "def f ( ) : newline indent pass newline outdent EOF"}, |
| 106 | {`def f(): |
| 107 | pass |
| 108 | ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, |
| 109 | {"pass", "pass EOF"}, |
| 110 | {"pass\n", "pass newline EOF"}, |
| 111 | {"pass\n ", "pass newline EOF"}, |
| 112 | {"pass\n \n", "pass newline EOF"}, |
| 113 | {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, |
| 114 | {`x = 1 + \ |
| 115 | 2`, `x = 1 + 2 EOF`}, |
| 116 | {`x = 'a\nb'`, `x = "a\nb" EOF`}, |
| 117 | {`x = 'a\zb'`, `x = "a\\zb" EOF`}, |
| 118 | {`x = r'a\nb'`, `x = "a\\nb" EOF`}, |
| 119 | {`x = '\''`, `x = "'" EOF`}, |
| 120 | {`x = "\""`, `x = "\"" EOF`}, |
| 121 | {`x = r'\''`, `x = "\\'" EOF`}, |
| 122 | {`x = '''\''''`, `x = "'" EOF`}, |
| 123 | {`x = r'''\''''`, `x = "\\'" EOF`}, |
| 124 | {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, |
| 125 | {"x = '''a\nb'''", `x = "a\nb" EOF`}, |
| 126 | {"x = '''a\rb'''", `x = "a\nb" EOF`}, |
| 127 | {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, |
| 128 | {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, |
| 129 | {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, |
| 130 | {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, |
| 131 | {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, |
| 132 | {"a\rb", `a newline b EOF`}, |
| 133 | {"a\nb", `a newline b EOF`}, |
| 134 | {"a\r\nb", `a newline b EOF`}, |
| 135 | {"a\n\nb", `a newline b EOF`}, |
| 136 | // numbers |
| 137 | {"0", `0 EOF`}, |
| 138 | {"00", `0 EOF`}, |
| 139 | {"0.", `0.000000e+00 EOF`}, |
| 140 | {"0.e1", `0.000000e+00 EOF`}, |
| 141 | {".0", `0.000000e+00 EOF`}, |
| 142 | {"0.0", `0.000000e+00 EOF`}, |
| 143 | {".e1", `. e1 EOF`}, |
| 144 | {"1", `1 EOF`}, |
| 145 | {"1.", `1.000000e+00 EOF`}, |
| 146 | {".1", `1.000000e-01 EOF`}, |
| 147 | {".1e1", `1.000000e+00 EOF`}, |
| 148 | {".1e+1", `1.000000e+00 EOF`}, |
| 149 | {".1e-1", `1.000000e-02 EOF`}, |
| 150 | {"1e1", `1.000000e+01 EOF`}, |
| 151 | {"1e+1", `1.000000e+01 EOF`}, |
| 152 | {"1e-1", `1.000000e-01 EOF`}, |
| 153 | {"123", `123 EOF`}, |
| 154 | {"123e45", `1.230000e+47 EOF`}, |
| 155 | // hex |
| 156 | {"0xA", `10 EOF`}, |
| 157 | {"0xAAG", `170 G EOF`}, |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame^] | 158 | {"0xG", `foo.sky:1:1: invalid hex literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 159 | {"0XA", `10 EOF`}, |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame^] | 160 | {"0XG", `foo.sky:1:1: invalid hex literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 161 | {"0xA.", `10 . EOF`}, |
| 162 | {"0xA.e1", `10 . e1 EOF`}, |
| 163 | // octal |
| 164 | {"0o123", `83 EOF`}, |
| 165 | {"0o12834", `10 834 EOF`}, |
| 166 | {"0o12934", `10 934 EOF`}, |
| 167 | {"0o12934.", `10 9.340000e+02 EOF`}, |
| 168 | {"0o12934.1", `10 9.341000e+02 EOF`}, |
| 169 | {"0o12934e1", `10 9.340000e+03 EOF`}, |
| 170 | {"0o123.", `83 . EOF`}, |
| 171 | {"0o123.1", `83 1.000000e-01 EOF`}, |
| 172 | // TODO(adonovan): reenable later. |
| 173 | // {"0123", `obsolete form of octal literal; use 0o123`}, |
| 174 | {"0123", `83 EOF`}, |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame^] | 175 | {"012834", `foo.sky:1:1: invalid int literal`}, |
| 176 | {"012934", `foo.sky:1:1: invalid int literal`}, |
| 177 | {"i = 012934", `foo.sky:1:5: invalid int literal`}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 178 | // octal escapes in string literals |
| 179 | {`"\037"`, `"\x1f" EOF`}, |
| 180 | {`"\377"`, `"\xff" EOF`}, |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame^] | 181 | {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' |
| 182 | {`"\400"`, `foo.sky:1:1: invalid escape sequence \400`}, // unlike Python 2 and 3 |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 183 | // Backslashes that are not part of escapes are treated literally, |
| 184 | // but this behavior will change; see b/34519173. |
| 185 | {`"\+"`, `"\\+" EOF`}, |
| 186 | {`"\o123"`, `"\\o123" EOF`}, |
| 187 | // floats starting with octal digits |
| 188 | {"012934.", `1.293400e+04 EOF`}, |
| 189 | {"012934.1", `1.293410e+04 EOF`}, |
| 190 | {"012934e1", `1.293400e+05 EOF`}, |
| 191 | {"0123.", `1.230000e+02 EOF`}, |
| 192 | {"0123.1", `1.231000e+02 EOF`}, |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame^] | 193 | // issue #16 |
| 194 | {"x ! 0", "foo.sky:1:3: unexpected input character '!'"}, |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 195 | } { |
| 196 | got, err := scan(test.input) |
| 197 | if err != nil { |
Ariel Mashraki | caa37b4 | 2017-10-27 19:27:28 +0300 | [diff] [blame^] | 198 | got = err.(Error).Error() |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 199 | } |
| 200 | if test.want != got { |
| 201 | t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | // dataFile is the same as skylarktest.DataFile. |
| 207 | // We make a copy to avoid a dependency cycle. |
| 208 | var dataFile = func(pkgdir, filename string) string { |
| 209 | return filepath.Join(build.Default.GOPATH, "src/github.com/google", pkgdir, filename) |
| 210 | } |
| 211 | |
| 212 | func BenchmarkScan(b *testing.B) { |
| 213 | filename := dataFile("skylark/syntax", "testdata/def.bzl") |
| 214 | b.StopTimer() |
| 215 | data, err := ioutil.ReadFile(filename) |
| 216 | if err != nil { |
| 217 | b.Fatal(err) |
| 218 | } |
| 219 | b.StartTimer() |
| 220 | |
| 221 | for i := 0; i < b.N; i++ { |
| 222 | sc, err := newScanner(filename, data) |
| 223 | if err != nil { |
| 224 | b.Fatal(err) |
| 225 | } |
| 226 | var val tokenValue |
| 227 | for sc.nextToken(&val) != EOF { |
| 228 | } |
| 229 | } |
| 230 | } |