Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame^] | 1 | // Copyright 2018 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package text |
| 6 | |
| 7 | import ( |
| 8 | "fmt" |
| 9 | "math" |
| 10 | "strings" |
| 11 | "testing" |
| 12 | "unicode/utf8" |
| 13 | |
| 14 | "github.com/google/go-cmp/cmp" |
| 15 | "github.com/google/go-cmp/cmp/cmpopts" |
| 16 | "google.golang.org/proto/internal/flags" |
| 17 | "google.golang.org/proto/reflect/protoreflect" |
| 18 | ) |
| 19 | |
| 20 | func Test(t *testing.T) { |
| 21 | const space = " \n\r\t" |
| 22 | var S = fmt.Sprintf |
| 23 | var V = ValueOf |
| 24 | var ID = func(n protoreflect.Name) Value { return V(n) } |
| 25 | type Lst = []Value |
| 26 | type Msg = [][2]Value |
| 27 | |
| 28 | tests := []struct { |
| 29 | in string |
| 30 | wantVal Value |
| 31 | wantOut string |
| 32 | wantOutBracket string |
| 33 | wantOutASCII string |
| 34 | wantOutIndent string |
| 35 | wantErr string |
| 36 | }{{ |
| 37 | in: "", |
| 38 | wantVal: V(Msg{}), |
| 39 | wantOutIndent: "\n", |
| 40 | }, { |
| 41 | in: S("%s# hello%s", space, space), |
| 42 | wantVal: V(Msg{}), |
| 43 | }, { |
| 44 | in: S("%s# hello\rfoo:bar", space), |
| 45 | wantVal: V(Msg{}), |
| 46 | }, { |
| 47 | // Comments only extend until the newline. |
| 48 | in: S("%s# hello\nfoo:bar", space), |
| 49 | wantVal: V(Msg{{ID("foo"), ID("bar")}}), |
| 50 | wantOut: "foo:bar", |
| 51 | wantOutIndent: "foo: bar\n", |
| 52 | }, { |
| 53 | // NUL is an invalid whitespace since C++ uses C-strings. |
| 54 | in: "\x00", |
| 55 | wantErr: `invalid "\x00" as identifier`, |
| 56 | }, { |
| 57 | in: "foo:0", |
| 58 | wantVal: V(Msg{{ID("foo"), V(uint32(0))}}), |
| 59 | wantOut: "foo:0", |
| 60 | }, { |
| 61 | in: S("%sfoo%s:0", space, space), |
| 62 | wantVal: V(Msg{{ID("foo"), V(uint32(0))}}), |
| 63 | }, { |
| 64 | in: "foo bar:0", |
| 65 | wantErr: `expected ':' after message key`, |
| 66 | }, { |
| 67 | in: "[foo]:0", |
| 68 | wantVal: V(Msg{{V("foo"), V(uint32(0))}}), |
| 69 | wantOut: "[foo]:0", |
| 70 | wantOutIndent: "[foo]: 0\n", |
| 71 | }, { |
| 72 | in: S("%s[%sfoo%s]%s:0", space, space, space, space), |
| 73 | wantVal: V(Msg{{V("foo"), V(uint32(0))}}), |
| 74 | }, { |
| 75 | in: "[proto.package.name]:0", |
| 76 | wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}), |
| 77 | wantOut: "[proto.package.name]:0", |
| 78 | wantOutIndent: "[proto.package.name]: 0\n", |
| 79 | }, { |
| 80 | in: S("%s[%sproto.package.name%s]%s:0", space, space, space, space), |
| 81 | wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}), |
| 82 | }, { |
| 83 | in: "['sub.domain.com\x2fpath\x2fto\x2fproto.package.name']:0", |
| 84 | wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}), |
| 85 | wantOut: "[sub.domain.com/path/to/proto.package.name]:0", |
| 86 | wantOutIndent: "[sub.domain.com/path/to/proto.package.name]: 0\n", |
| 87 | }, { |
| 88 | in: "[\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"]:0", |
| 89 | wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}), |
| 90 | }, { |
| 91 | in: S("%s[%s'sub.domain.com\x2fpath\x2fto\x2fproto.package.name'%s]%s:0", space, space, space, space), |
| 92 | wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}), |
| 93 | }, { |
| 94 | in: S("%s[%s\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"%s]%s:0", space, space, space, space), |
| 95 | wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}), |
| 96 | }, { |
| 97 | in: `['http://example.com/path/to/proto.package.name']:0`, |
| 98 | wantVal: V(Msg{{V("http://example.com/path/to/proto.package.name"), V(uint32(0))}}), |
| 99 | wantOut: `["http://example.com/path/to/proto.package.name"]:0`, |
| 100 | wantOutIndent: `["http://example.com/path/to/proto.package.name"]: 0` + "\n", |
| 101 | }, { |
| 102 | in: "[proto.package.name:0", |
| 103 | wantErr: `invalid character ':', expected ']' at end of extension name`, |
| 104 | }, { |
| 105 | in: "[proto.package name]:0", |
| 106 | wantErr: `invalid character 'n', expected ']' at end of extension name`, |
| 107 | }, { |
| 108 | in: `["proto.package" "name"]:0`, |
| 109 | wantErr: `invalid character '"', expected ']' at end of extension name`, |
| 110 | }, { |
| 111 | in: `["\z"]`, |
| 112 | wantErr: `invalid escape code "\\z" in string`, |
| 113 | }, { |
| 114 | in: "[$]", |
| 115 | wantErr: `invalid "$" as identifier`, |
| 116 | }, { |
| 117 | // This parses fine, but should result in a error later since no |
| 118 | // type name in proto will ever be just a number. |
| 119 | in: "[20]:0", |
| 120 | wantVal: V(Msg{{V("20"), V(uint32(0))}}), |
| 121 | wantOut: "[20]:0", |
| 122 | }, { |
| 123 | in: "20:0", |
| 124 | wantVal: V(Msg{{V(uint32(20)), V(uint32(0))}}), |
| 125 | wantOut: "20:0", |
| 126 | }, { |
| 127 | in: "0x20:0", |
| 128 | wantVal: V(Msg{{V(uint32(0x20)), V(uint32(0))}}), |
| 129 | wantOut: "32:0", |
| 130 | }, { |
| 131 | in: "020:0", |
| 132 | wantVal: V(Msg{{V(uint32(020)), V(uint32(0))}}), |
| 133 | wantOut: "16:0", |
| 134 | }, { |
| 135 | in: "-20:0", |
| 136 | wantErr: `invalid "-20" as identifier`, |
| 137 | }, { |
| 138 | in: `foo:true bar:"s" baz:{} qux:[] wib:id`, |
| 139 | wantVal: V(Msg{ |
| 140 | {ID("foo"), V(true)}, |
| 141 | {ID("bar"), V("s")}, |
| 142 | {ID("baz"), V(Msg{})}, |
| 143 | {ID("qux"), V(Lst{})}, |
| 144 | {ID("wib"), ID("id")}, |
| 145 | }), |
| 146 | wantOut: `foo:true bar:"s" baz:{} qux:[] wib:id`, |
| 147 | wantOutIndent: "foo: true\nbar: \"s\"\nbaz: {}\nqux: []\nwib: id\n", |
| 148 | }, { |
| 149 | in: S(`%sfoo%s:%strue%s %sbar%s:%s"s"%s %sbaz%s:%s<>%s %squx%s:%s[]%s %swib%s:%sid%s`, |
| 150 | space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space), |
| 151 | wantVal: V(Msg{ |
| 152 | {ID("foo"), V(true)}, |
| 153 | {ID("bar"), V("s")}, |
| 154 | {ID("baz"), V(Msg{})}, |
| 155 | {ID("qux"), V(Lst{})}, |
| 156 | {ID("wib"), ID("id")}, |
| 157 | }), |
| 158 | }, { |
| 159 | in: `foo:true;`, |
| 160 | wantVal: V(Msg{{ID("foo"), V(true)}}), |
| 161 | wantOut: "foo:true", |
| 162 | wantOutIndent: "foo: true\n", |
| 163 | }, { |
| 164 | in: `foo:true,`, |
| 165 | wantVal: V(Msg{{ID("foo"), V(true)}}), |
| 166 | }, { |
| 167 | in: `foo:bar;,`, |
| 168 | wantErr: `invalid "," as identifier`, |
| 169 | }, { |
| 170 | in: `foo:bar,;`, |
| 171 | wantErr: `invalid ";" as identifier`, |
| 172 | }, { |
| 173 | in: `footrue`, |
| 174 | wantErr: `unexpected EOF`, |
| 175 | }, { |
| 176 | in: `foo true`, |
| 177 | wantErr: `expected ':' after message key`, |
| 178 | }, { |
| 179 | in: `foo"s"`, |
| 180 | wantErr: `expected ':' after message key`, |
| 181 | }, { |
| 182 | in: `foo "s"`, |
| 183 | wantErr: `expected ':' after message key`, |
| 184 | }, { |
| 185 | in: `foo{}`, |
| 186 | wantVal: V(Msg{{ID("foo"), V(Msg{})}}), |
| 187 | wantOut: "foo:{}", |
| 188 | wantOutBracket: "foo:<>", |
| 189 | wantOutIndent: "foo: {}\n", |
| 190 | }, { |
| 191 | in: `foo {}`, |
| 192 | wantVal: V(Msg{{ID("foo"), V(Msg{})}}), |
| 193 | }, { |
| 194 | in: `foo<>`, |
| 195 | wantVal: V(Msg{{ID("foo"), V(Msg{})}}), |
| 196 | }, { |
| 197 | in: `foo <>`, |
| 198 | wantVal: V(Msg{{ID("foo"), V(Msg{})}}), |
| 199 | }, { |
| 200 | in: `foo[]`, |
| 201 | wantErr: `expected ':' after message key`, |
| 202 | }, { |
| 203 | in: `foo []`, |
| 204 | wantErr: `expected ':' after message key`, |
| 205 | }, { |
| 206 | in: `foo:truebar:true`, |
| 207 | wantErr: `invalid ":" as identifier`, |
| 208 | }, { |
| 209 | in: `foo:"s"bar:true`, |
| 210 | wantVal: V(Msg{{ID("foo"), V("s")}, {ID("bar"), V(true)}}), |
| 211 | wantOut: `foo:"s" bar:true`, |
| 212 | wantOutIndent: "foo: \"s\"\nbar: true\n", |
| 213 | }, { |
| 214 | in: `foo:0bar:true`, |
| 215 | wantErr: `invalid "0bar" as number or bool`, |
| 216 | }, { |
| 217 | in: `foo:{}bar:true`, |
| 218 | wantVal: V(Msg{{ID("foo"), V(Msg{})}, {ID("bar"), V(true)}}), |
| 219 | wantOut: "foo:{} bar:true", |
| 220 | wantOutBracket: "foo:<> bar:true", |
| 221 | wantOutIndent: "foo: {}\nbar: true\n", |
| 222 | }, { |
| 223 | in: `foo:[]bar:true`, |
| 224 | wantVal: V(Msg{{ID("foo"), V(Lst{})}, {ID("bar"), V(true)}}), |
| 225 | }, { |
| 226 | in: `foo{bar:true}`, |
| 227 | wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}), |
| 228 | wantOut: "foo:{bar:true}", |
| 229 | wantOutBracket: "foo:<bar:true>", |
| 230 | wantOutIndent: "foo: {\n\tbar: true\n}\n", |
| 231 | }, { |
| 232 | in: `foo<bar:true>`, |
| 233 | wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}), |
| 234 | }, { |
| 235 | in: `foo{bar:true,}`, |
| 236 | wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}), |
| 237 | }, { |
| 238 | in: `foo{bar:true;}`, |
| 239 | wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}), |
| 240 | }, { |
| 241 | in: `foo{`, |
| 242 | wantErr: `unexpected EOF`, |
| 243 | }, { |
| 244 | in: `foo{ `, |
| 245 | wantErr: `unexpected EOF`, |
| 246 | }, { |
| 247 | in: `foo{[`, |
| 248 | wantErr: `unexpected EOF`, |
| 249 | }, { |
| 250 | in: `foo{[ `, |
| 251 | wantErr: `unexpected EOF`, |
| 252 | }, { |
| 253 | in: `foo{bar:true,;}`, |
| 254 | wantErr: `invalid ";" as identifier`, |
| 255 | }, { |
| 256 | in: `foo{bar:true;,}`, |
| 257 | wantErr: `invalid "," as identifier`, |
| 258 | }, { |
| 259 | in: `foo<bar:{}>`, |
| 260 | wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(Msg{})}})}}), |
| 261 | wantOut: "foo:{bar:{}}", |
| 262 | wantOutBracket: "foo:<bar:<>>", |
| 263 | wantOutIndent: "foo: {\n\tbar: {}\n}\n", |
| 264 | }, { |
| 265 | in: `foo<bar:{>`, |
| 266 | wantErr: `invalid character '>', expected '}' at end of message`, |
| 267 | }, { |
| 268 | in: `foo<bar:{}`, |
| 269 | wantErr: `unexpected EOF`, |
| 270 | }, { |
| 271 | in: `arr:[]`, |
| 272 | wantVal: V(Msg{{ID("arr"), V(Lst{})}}), |
| 273 | wantOut: "arr:[]", |
| 274 | wantOutBracket: "arr:[]", |
| 275 | wantOutIndent: "arr: []\n", |
| 276 | }, { |
| 277 | in: `arr:[,]`, |
| 278 | wantErr: `invalid "," as number or bool`, |
| 279 | }, { |
| 280 | in: `arr:[0 0]`, |
| 281 | wantErr: `invalid character '0', expected ']' at end of list`, |
| 282 | }, { |
| 283 | in: `arr:["foo" "bar"]`, |
| 284 | wantVal: V(Msg{{ID("arr"), V(Lst{V("foobar")})}}), |
| 285 | wantOut: `arr:["foobar"]`, |
| 286 | wantOutBracket: `arr:["foobar"]`, |
| 287 | wantOutIndent: "arr: [\n\t\"foobar\"\n]\n", |
| 288 | }, { |
| 289 | in: `arr:[0,]`, |
| 290 | wantErr: `invalid "]" as number or bool`, |
| 291 | }, { |
| 292 | in: `arr:[true,0,"",id,[],{}]`, |
| 293 | wantVal: V(Msg{{ID("arr"), V(Lst{ |
| 294 | V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}), |
| 295 | })}}), |
| 296 | wantOut: `arr:[true,0,"",id,[],{}]`, |
| 297 | wantOutBracket: `arr:[true,0,"",id,[],<>]`, |
| 298 | wantOutIndent: "arr: [\n\ttrue,\n\t0,\n\t\"\",\n\tid,\n\t[],\n\t{}\n]\n", |
| 299 | }, { |
| 300 | in: S(`arr:[%strue%s,%s0%s,%s""%s,%sid%s,%s[]%s,%s{}%s]`, |
| 301 | space, space, space, space, space, space, space, space, space, space, space, space), |
| 302 | wantVal: V(Msg{{ID("arr"), V(Lst{ |
| 303 | V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}), |
| 304 | })}}), |
| 305 | }, { |
| 306 | in: `arr:[`, |
| 307 | wantErr: `unexpected EOF`, |
| 308 | }, { |
| 309 | in: `{`, |
| 310 | wantErr: `invalid "{" as identifier`, |
| 311 | }, { |
| 312 | in: `<`, |
| 313 | wantErr: `invalid "<" as identifier`, |
| 314 | }, { |
| 315 | in: `[`, |
| 316 | wantErr: "unexpected EOF", |
| 317 | }, { |
| 318 | in: `}`, |
| 319 | wantErr: "1 bytes of unconsumed input", |
| 320 | }, { |
| 321 | in: `>`, |
| 322 | wantErr: "1 bytes of unconsumed input", |
| 323 | }, { |
| 324 | in: `]`, |
| 325 | wantErr: `invalid "]" as identifier`, |
| 326 | }, { |
| 327 | in: `str: "'"`, |
| 328 | wantVal: V(Msg{{ID("str"), V(`'`)}}), |
| 329 | wantOut: `str:"'"`, |
| 330 | }, { |
| 331 | in: `str: '"'`, |
| 332 | wantVal: V(Msg{{ID("str"), V(`"`)}}), |
| 333 | wantOut: `str:"\""`, |
| 334 | }, { |
| 335 | // String that has as few escaped characters as possible. |
| 336 | in: `str: ` + func() string { |
| 337 | var b []byte |
| 338 | for i := 0; i < utf8.RuneSelf; i++ { |
| 339 | switch i { |
| 340 | case 0, '\\', '\n', '\'': // these must be escaped, so ignore them |
| 341 | default: |
| 342 | b = append(b, byte(i)) |
| 343 | } |
| 344 | } |
| 345 | return "'" + string(b) + "'" |
| 346 | }(), |
| 347 | wantVal: V(Msg{{ID("str"), V("\x01\x02\x03\x04\x05\x06\a\b\t\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f")}}), |
| 348 | wantOut: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"", |
| 349 | wantOutASCII: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"", |
| 350 | }, { |
| 351 | in: "str: '\xde\xad\xbe\xef'", |
| 352 | wantVal: V(Msg{{ID("str"), V("\xde\xad\xbe\xef")}}), |
| 353 | wantOut: "str:\"\u07ad\\xbe\\xef\"", |
| 354 | wantOutASCII: `str:"\u07ad\xbe\xef"`, |
| 355 | wantErr: "invalid UTF-8 detected", |
| 356 | }, { |
| 357 | // Valid UTF-8 wire encoding, but sub-optimal encoding. |
| 358 | in: "str: '\xc0\x80'", |
| 359 | wantVal: V(Msg{{ID("str"), V("\xc0\x80")}}), |
| 360 | wantOut: `str:"\xc0\x80"`, |
| 361 | wantOutASCII: `str:"\xc0\x80"`, |
| 362 | wantErr: "invalid UTF-8 detected", |
| 363 | }, { |
| 364 | // Valid UTF-8 wire encoding, but invalid rune (surrogate pair). |
| 365 | in: "str: '\xed\xa0\x80'", |
| 366 | wantVal: V(Msg{{ID("str"), V("\xed\xa0\x80")}}), |
| 367 | wantOut: `str:"\xed\xa0\x80"`, |
| 368 | wantOutASCII: `str:"\xed\xa0\x80"`, |
| 369 | wantErr: "invalid UTF-8 detected", |
| 370 | }, { |
| 371 | // Valid UTF-8 wire encoding, but invalid rune (above max rune). |
| 372 | in: "str: '\xf7\xbf\xbf\xbf'", |
| 373 | wantVal: V(Msg{{ID("str"), V("\xf7\xbf\xbf\xbf")}}), |
| 374 | wantOut: `str:"\xf7\xbf\xbf\xbf"`, |
| 375 | wantOutASCII: `str:"\xf7\xbf\xbf\xbf"`, |
| 376 | wantErr: "invalid UTF-8 detected", |
| 377 | }, { |
| 378 | // Valid UTF-8 wire encoding of the RuneError rune. |
| 379 | in: "str: '\xef\xbf\xbd'", |
| 380 | wantVal: V(Msg{{ID("str"), V(string(utf8.RuneError))}}), |
| 381 | wantOut: `str:"` + string(utf8.RuneError) + `"`, |
| 382 | wantOutASCII: `str:"\ufffd"`, |
| 383 | }, { |
| 384 | in: "str: 'hello\u1234world'", |
| 385 | wantVal: V(Msg{{ID("str"), V("hello\u1234world")}}), |
| 386 | wantOut: "str:\"hello\u1234world\"", |
| 387 | wantOutASCII: `str:"hello\u1234world"`, |
| 388 | }, { |
| 389 | in: `str: '\"\'\\\?\a\b\n\r\t\v\f\1\12\123\xA\xaB\x12\uAb8f\U0010FFFF'`, |
| 390 | wantVal: V(Msg{{ID("str"), V("\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff")}}), |
| 391 | wantOut: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`, |
| 392 | wantOutASCII: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`, |
| 393 | }, { |
| 394 | in: `str: '`, |
| 395 | wantErr: `unexpected EOF`, |
| 396 | }, { |
| 397 | in: `str: '\`, |
| 398 | wantErr: `unexpected EOF`, |
| 399 | }, { |
| 400 | in: `str: '\'`, |
| 401 | wantErr: `unexpected EOF`, |
| 402 | }, { |
| 403 | in: `str: '\8'`, |
| 404 | wantErr: `invalid escape code "\\8" in string`, |
| 405 | }, { |
| 406 | in: `str: '\1x'`, |
| 407 | wantVal: V(Msg{{ID("str"), V("\001x")}}), |
| 408 | wantOut: `str:"\x01x"`, |
| 409 | wantOutASCII: `str:"\x01x"`, |
| 410 | }, { |
| 411 | in: `str: '\12x'`, |
| 412 | wantVal: V(Msg{{ID("str"), V("\012x")}}), |
| 413 | wantOut: `str:"\nx"`, |
| 414 | wantOutASCII: `str:"\nx"`, |
| 415 | }, { |
| 416 | in: `str: '\123x'`, |
| 417 | wantVal: V(Msg{{ID("str"), V("\123x")}}), |
| 418 | wantOut: `str:"Sx"`, |
| 419 | wantOutASCII: `str:"Sx"`, |
| 420 | }, { |
| 421 | in: `str: '\1234x'`, |
| 422 | wantVal: V(Msg{{ID("str"), V("\1234x")}}), |
| 423 | wantOut: `str:"S4x"`, |
| 424 | wantOutASCII: `str:"S4x"`, |
| 425 | }, { |
| 426 | in: `str: '\1'`, |
| 427 | wantVal: V(Msg{{ID("str"), V("\001")}}), |
| 428 | wantOut: `str:"\x01"`, |
| 429 | wantOutASCII: `str:"\x01"`, |
| 430 | }, { |
| 431 | in: `str: '\12'`, |
| 432 | wantVal: V(Msg{{ID("str"), V("\012")}}), |
| 433 | wantOut: `str:"\n"`, |
| 434 | wantOutASCII: `str:"\n"`, |
| 435 | }, { |
| 436 | in: `str: '\123'`, |
| 437 | wantVal: V(Msg{{ID("str"), V("\123")}}), |
| 438 | wantOut: `str:"S"`, |
| 439 | wantOutASCII: `str:"S"`, |
| 440 | }, { |
| 441 | in: `str: '\1234'`, |
| 442 | wantVal: V(Msg{{ID("str"), V("\1234")}}), |
| 443 | wantOut: `str:"S4"`, |
| 444 | wantOutASCII: `str:"S4"`, |
| 445 | }, { |
| 446 | in: `str: '\377'`, |
| 447 | wantVal: V(Msg{{ID("str"), V("\377")}}), |
| 448 | wantOut: `str:"\xff"`, |
| 449 | wantOutASCII: `str:"\xff"`, |
| 450 | }, { |
| 451 | // Overflow octal escape. |
| 452 | in: `str: '\400'`, |
| 453 | wantErr: `invalid octal escape code "\\400" in string`, |
| 454 | }, { |
| 455 | in: `str: '\xfx'`, |
| 456 | wantVal: V(Msg{{ID("str"), V("\x0fx")}}), |
| 457 | wantOut: `str:"\x0fx"`, |
| 458 | wantOutASCII: `str:"\x0fx"`, |
| 459 | }, { |
| 460 | in: `str: '\xffx'`, |
| 461 | wantVal: V(Msg{{ID("str"), V("\xffx")}}), |
| 462 | wantOut: `str:"\xffx"`, |
| 463 | wantOutASCII: `str:"\xffx"`, |
| 464 | }, { |
| 465 | in: `str: '\xfffx'`, |
| 466 | wantVal: V(Msg{{ID("str"), V("\xfffx")}}), |
| 467 | wantOut: `str:"\xfffx"`, |
| 468 | wantOutASCII: `str:"\xfffx"`, |
| 469 | }, { |
| 470 | in: `str: '\xf'`, |
| 471 | wantVal: V(Msg{{ID("str"), V("\x0f")}}), |
| 472 | wantOut: `str:"\x0f"`, |
| 473 | wantOutASCII: `str:"\x0f"`, |
| 474 | }, { |
| 475 | in: `str: '\xff'`, |
| 476 | wantVal: V(Msg{{ID("str"), V("\xff")}}), |
| 477 | wantOut: `str:"\xff"`, |
| 478 | wantOutASCII: `str:"\xff"`, |
| 479 | }, { |
| 480 | in: `str: '\xfff'`, |
| 481 | wantVal: V(Msg{{ID("str"), V("\xfff")}}), |
| 482 | wantOut: `str:"\xfff"`, |
| 483 | wantOutASCII: `str:"\xfff"`, |
| 484 | }, { |
| 485 | in: `str: '\xz'`, |
| 486 | wantErr: `invalid hex escape code "\\x" in string`, |
| 487 | }, { |
| 488 | in: `str: '\uPo'`, |
| 489 | wantErr: `unexpected EOF`, |
| 490 | }, { |
| 491 | in: `str: '\uPoo'`, |
| 492 | wantErr: `invalid Unicode escape code "\\uPoo'" in string`, |
| 493 | }, { |
| 494 | in: `str: '\uPoop'`, |
| 495 | wantErr: `invalid Unicode escape code "\\uPoop" in string`, |
| 496 | }, { |
| 497 | // Unmatched surrogate pair. |
| 498 | in: `str: '\uDEAD'`, |
| 499 | wantErr: `unexpected EOF`, // trying to reader other half |
| 500 | }, { |
| 501 | // Surrogate pair with invalid other half. |
| 502 | in: `str: '\uDEAD\u0000'`, |
| 503 | wantErr: `invalid Unicode escape code "\\u0000" in string`, |
| 504 | }, { |
| 505 | // Properly matched surrogate pair. |
| 506 | in: `str: '\uD800\uDEAD'`, |
| 507 | wantVal: V(Msg{{ID("str"), V("𐊭")}}), |
| 508 | wantOut: `str:"𐊭"`, |
| 509 | wantOutASCII: `str:"\U000102ad"`, |
| 510 | }, { |
| 511 | // Overflow on Unicode rune. |
| 512 | in: `str: '\U00110000'`, |
| 513 | wantErr: `invalid Unicode escape code "\\U00110000" in string`, |
| 514 | }, { |
| 515 | in: `str: '\z'`, |
| 516 | wantErr: `invalid escape code "\\z" in string`, |
| 517 | }, { |
| 518 | // Strings cannot have NUL literal since C-style strings forbid them. |
| 519 | in: "str: '\x00'", |
| 520 | wantErr: `invalid character '\x00' in string`, |
| 521 | }, { |
| 522 | // Strings cannot have newline literal. The C++ permits them if an |
| 523 | // option is specified to allow them. In Go, we always forbid them. |
| 524 | in: "str: '\n'", |
| 525 | wantErr: `invalid character '\n' in string`, |
| 526 | }, { |
| 527 | in: "name: \"My name is \"\n\"elsewhere\"", |
| 528 | wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}), |
| 529 | wantOut: `name:"My name is elsewhere"`, |
| 530 | wantOutASCII: `name:"My name is elsewhere"`, |
| 531 | }, { |
| 532 | in: "name: 'My name is '\n'elsewhere'", |
| 533 | wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}), |
| 534 | }, { |
| 535 | in: "name: 'My name is '\n\"elsewhere\"", |
| 536 | wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}), |
| 537 | }, { |
| 538 | in: "name: \"My name is \"\n'elsewhere'", |
| 539 | wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}), |
| 540 | }, { |
| 541 | in: "name: \"My \"'name '\"is \"\n'elsewhere'", |
| 542 | wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}), |
| 543 | }, { |
| 544 | in: `crazy:"x'"'\""\''"'z"`, |
| 545 | wantVal: V(Msg{{ID("crazy"), V(`x'""''z`)}}), |
| 546 | }, { |
| 547 | in: `nums: [t,T,true,True,TRUE,f,F,false,False,FALSE]`, |
| 548 | wantVal: V(Msg{{ID("nums"), V(Lst{ |
| 549 | V(true), |
| 550 | ID("T"), |
| 551 | V(true), |
| 552 | V(true), |
| 553 | ID("TRUE"), |
| 554 | V(false), |
| 555 | ID("F"), |
| 556 | V(false), |
| 557 | V(false), |
| 558 | ID("FALSE"), |
| 559 | })}}), |
| 560 | wantOut: "nums:[true,T,true,true,TRUE,false,F,false,false,FALSE]", |
| 561 | wantOutIndent: "nums: [\n\ttrue,\n\tT,\n\ttrue,\n\ttrue,\n\tTRUE,\n\tfalse,\n\tF,\n\tfalse,\n\tfalse,\n\tFALSE\n]\n", |
| 562 | }, { |
| 563 | in: `nums: [nan,inf,-inf,NaN,NAN,Inf,INF]`, |
| 564 | wantVal: V(Msg{{ID("nums"), V(Lst{ |
| 565 | V(math.NaN()), |
| 566 | V(math.Inf(+1)), |
| 567 | V(math.Inf(-1)), |
| 568 | ID("NaN"), |
| 569 | ID("NAN"), |
| 570 | ID("Inf"), |
| 571 | ID("INF"), |
| 572 | })}}), |
| 573 | wantOut: "nums:[nan,inf,-inf,NaN,NAN,Inf,INF]", |
| 574 | wantOutIndent: "nums: [\n\tnan,\n\tinf,\n\t-inf,\n\tNaN,\n\tNAN,\n\tInf,\n\tINF\n]\n", |
| 575 | }, { |
| 576 | // C++ permits this, but we currently reject this. |
| 577 | in: `num: -nan`, |
| 578 | wantErr: `invalid "-nan" as number or bool`, |
| 579 | }, { |
| 580 | in: `nums: [0,-0,-9876543210,9876543210,0x0,0x0123456789abcdef,-0x0123456789abcdef,01234567,-01234567]`, |
| 581 | wantVal: V(Msg{{ID("nums"), V(Lst{ |
| 582 | V(uint32(0)), |
| 583 | V(int32(-0)), |
| 584 | V(int64(-9876543210)), |
| 585 | V(uint64(9876543210)), |
| 586 | V(uint32(0x0)), |
| 587 | V(uint64(0x0123456789abcdef)), |
| 588 | V(int64(-0x0123456789abcdef)), |
| 589 | V(uint64(01234567)), |
| 590 | V(int64(-01234567)), |
| 591 | })}}), |
| 592 | wantOut: "nums:[0,0,-9876543210,9876543210,0,81985529216486895,-81985529216486895,342391,-342391]", |
| 593 | wantOutIndent: "nums: [\n\t0,\n\t0,\n\t-9876543210,\n\t9876543210,\n\t0,\n\t81985529216486895,\n\t-81985529216486895,\n\t342391,\n\t-342391\n]\n", |
| 594 | }, { |
| 595 | in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`, |
| 596 | wantVal: V(Msg{{ID("nums"), V(Lst{ |
| 597 | V(0.0), |
| 598 | V(0.0), |
| 599 | V(1.0), |
| 600 | V(10.0), |
| 601 | V(-0.0), |
| 602 | V(-1.0), |
| 603 | V(-10.0), |
| 604 | V(1.0), |
| 605 | V(0.1e-3), |
| 606 | V(1.5e+5), |
| 607 | V(1.0e+10), |
| 608 | V(0.0), |
| 609 | })}}), |
| 610 | wantOut: "nums:[0,0,1,10,0,-1,-10,1,0.0001,150000,1e+10,0]", |
| 611 | wantOutIndent: "nums: [\n\t0,\n\t0,\n\t1,\n\t10,\n\t0,\n\t-1,\n\t-10,\n\t1,\n\t0.0001,\n\t150000,\n\t1e+10,\n\t0\n]\n", |
| 612 | }, { |
| 613 | in: `nums: [0xbeefbeef,0xbeefbeefbeefbeef]`, |
| 614 | wantVal: V(Msg{{ID("nums"), func() Value { |
| 615 | if flags.Proto1Legacy { |
| 616 | return V(Lst{V(int32(-1091584273)), V(int64(-4688318750159552785))}) |
| 617 | } else { |
| 618 | return V(Lst{V(uint32(0xbeefbeef)), V(uint64(0xbeefbeefbeefbeef))}) |
| 619 | } |
| 620 | }()}}), |
| 621 | }, { |
| 622 | in: `num: +0`, |
| 623 | wantErr: `invalid "+0" as number or bool`, |
| 624 | }, { |
| 625 | in: `num: 01.1234`, |
| 626 | wantErr: `invalid "01.1234" as number or bool`, |
| 627 | }, { |
| 628 | in: `num: 0x`, |
| 629 | wantErr: `invalid "0x" as number or bool`, |
| 630 | }, { |
| 631 | in: `num: 0xX`, |
| 632 | wantErr: `invalid "0xX" as number or bool`, |
| 633 | }, { |
| 634 | in: `num: 0800`, |
| 635 | wantErr: `invalid "0800" as number or bool`, |
| 636 | }, { |
| 637 | in: `num: true.`, |
| 638 | wantErr: `invalid "true." as number or bool`, |
| 639 | }, { |
| 640 | in: `num: .`, |
| 641 | wantErr: `parsing ".": invalid syntax`, |
| 642 | }, { |
| 643 | in: `num: -.`, |
| 644 | wantErr: `parsing "-.": invalid syntax`, |
| 645 | }, { |
| 646 | in: `num: 1e10000`, |
| 647 | wantErr: `parsing "1e10000": value out of range`, |
| 648 | }, { |
| 649 | in: `num: 99999999999999999999`, |
| 650 | wantErr: `parsing "99999999999999999999": value out of range`, |
| 651 | }, { |
| 652 | in: `num: -99999999999999999999`, |
| 653 | wantErr: `parsing "-99999999999999999999": value out of range`, |
| 654 | }, { |
| 655 | in: "x: -", |
| 656 | wantErr: `syntax error (line 1:5)`, |
| 657 | }, { |
| 658 | in: "x:[\"💩\"x", |
| 659 | wantErr: `syntax error (line 1:7)`, |
| 660 | }, { |
| 661 | in: "x:\n\n[\"🔥🔥🔥\"x", |
| 662 | wantErr: `syntax error (line 3:7)`, |
| 663 | }, { |
| 664 | in: "x:[\"👍🏻👍🏿\"x", |
| 665 | wantErr: `syntax error (line 1:10)`, // multi-rune emojis; could be column:8 |
| 666 | }, { |
| 667 | in: ` |
| 668 | firstName : "John", |
| 669 | lastName : "Smith" , |
| 670 | isAlive : true, |
| 671 | age : 27, |
| 672 | address { # missing colon is okay for messages |
| 673 | streetAddress : "21 2nd Street" , |
| 674 | city : "New York" , |
| 675 | state : "NY" , |
| 676 | postalCode : "10021-3100" ; # trailing semicolon is okay |
| 677 | }, |
| 678 | phoneNumbers : [ { |
| 679 | type : "home" , |
| 680 | number : "212 555-1234" |
| 681 | } , { |
| 682 | type : "office" , |
| 683 | number : "646 555-4567" |
| 684 | } , { |
| 685 | type : "mobile" , |
| 686 | number : "123 456-7890" , # trailing comma is okay |
| 687 | } ], |
| 688 | children : [] , |
| 689 | spouse : null`, |
| 690 | wantVal: V(Msg{ |
| 691 | {ID("firstName"), V("John")}, |
| 692 | {ID("lastName"), V("Smith")}, |
| 693 | {ID("isAlive"), V(true)}, |
| 694 | {ID("age"), V(27.0)}, |
| 695 | {ID("address"), V(Msg{ |
| 696 | {ID("streetAddress"), V("21 2nd Street")}, |
| 697 | {ID("city"), V("New York")}, |
| 698 | {ID("state"), V("NY")}, |
| 699 | {ID("postalCode"), V("10021-3100")}, |
| 700 | })}, |
| 701 | {ID("phoneNumbers"), V([]Value{ |
| 702 | V(Msg{ |
| 703 | {ID("type"), V("home")}, |
| 704 | {ID("number"), V("212 555-1234")}, |
| 705 | }), |
| 706 | V(Msg{ |
| 707 | {ID("type"), V("office")}, |
| 708 | {ID("number"), V("646 555-4567")}, |
| 709 | }), |
| 710 | V(Msg{ |
| 711 | {ID("type"), V("mobile")}, |
| 712 | {ID("number"), V("123 456-7890")}, |
| 713 | }), |
| 714 | })}, |
| 715 | {ID("children"), V([]Value{})}, |
| 716 | {ID("spouse"), V(protoreflect.Name("null"))}, |
| 717 | }), |
| 718 | wantOut: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:{streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"} phoneNumbers:[{type:"home" number:"212 555-1234"},{type:"office" number:"646 555-4567"},{type:"mobile" number:"123 456-7890"}] children:[] spouse:null`, |
| 719 | wantOutBracket: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:<streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"> phoneNumbers:[<type:"home" number:"212 555-1234">,<type:"office" number:"646 555-4567">,<type:"mobile" number:"123 456-7890">] children:[] spouse:null`, |
| 720 | wantOutIndent: `firstName: "John" |
| 721 | lastName: "Smith" |
| 722 | isAlive: true |
| 723 | age: 27 |
| 724 | address: { |
| 725 | streetAddress: "21 2nd Street" |
| 726 | city: "New York" |
| 727 | state: "NY" |
| 728 | postalCode: "10021-3100" |
| 729 | } |
| 730 | phoneNumbers: [ |
| 731 | { |
| 732 | type: "home" |
| 733 | number: "212 555-1234" |
| 734 | }, |
| 735 | { |
| 736 | type: "office" |
| 737 | number: "646 555-4567" |
| 738 | }, |
| 739 | { |
| 740 | type: "mobile" |
| 741 | number: "123 456-7890" |
| 742 | } |
| 743 | ] |
| 744 | children: [] |
| 745 | spouse: null |
| 746 | `, |
| 747 | }} |
| 748 | |
| 749 | opts := cmp.Options{ |
| 750 | cmpopts.EquateEmpty(), |
| 751 | |
| 752 | // Transform composites (List and Message). |
| 753 | cmp.FilterValues(func(x, y Value) bool { |
| 754 | return (x.Type() == List && y.Type() == List) || (x.Type() == Message && y.Type() == Message) |
| 755 | }, cmp.Transformer("", func(v Value) interface{} { |
| 756 | if v.Type() == List { |
| 757 | return v.List() |
| 758 | } else { |
| 759 | return v.Message() |
| 760 | } |
| 761 | })), |
| 762 | |
| 763 | // Compare scalars (Bool, Int, Uint, Float, String, Name). |
| 764 | cmp.FilterValues(func(x, y Value) bool { |
| 765 | return !(x.Type() == List && y.Type() == List) && !(x.Type() == Message && y.Type() == Message) |
| 766 | }, cmp.Comparer(func(x, y Value) bool { |
| 767 | if x.Type() == List || x.Type() == Message || y.Type() == List || y.Type() == Message { |
| 768 | return false |
| 769 | } |
| 770 | // Ensure golden value is always in x variable. |
| 771 | if len(x.raw) > 0 { |
| 772 | x, y = y, x |
| 773 | } |
| 774 | switch x.Type() { |
| 775 | case Bool: |
| 776 | want, _ := x.Bool() |
| 777 | got, ok := y.Bool() |
| 778 | return got == want && ok |
| 779 | case Int: |
| 780 | want, _ := x.Int(true) |
| 781 | got, ok := y.Int(want < math.MinInt32 || math.MaxInt32 < want) |
| 782 | return got == want && ok |
| 783 | case Uint: |
| 784 | want, _ := x.Uint(true) |
| 785 | got, ok := y.Uint(math.MaxUint32 < want) |
| 786 | return got == want && ok |
| 787 | case Float: |
| 788 | want, _ := x.Float(true) |
| 789 | got, ok := y.Float(math.MaxFloat32 < math.Abs(want)) |
| 790 | if math.IsNaN(got) || math.IsNaN(want) { |
| 791 | return math.IsNaN(got) == math.IsNaN(want) |
| 792 | } |
| 793 | return got == want && ok |
| 794 | case Name: |
| 795 | want, _ := x.Name() |
| 796 | got, ok := y.Name() |
| 797 | return got == want && ok |
| 798 | default: |
| 799 | return x.String() == y.String() |
| 800 | } |
| 801 | })), |
| 802 | } |
| 803 | for _, tt := range tests { |
| 804 | t.Run("", func(t *testing.T) { |
| 805 | if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" { |
| 806 | gotVal, err := Unmarshal([]byte(tt.in)) |
| 807 | if err == nil { |
| 808 | if tt.wantErr != "" { |
| 809 | t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr) |
| 810 | } |
| 811 | } else { |
| 812 | if tt.wantErr == "" { |
| 813 | t.Errorf("Unmarshal(): got %v, want nil error", err) |
| 814 | } else if !strings.Contains(err.Error(), tt.wantErr) { |
| 815 | t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr) |
| 816 | } |
| 817 | } |
| 818 | if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" { |
| 819 | t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff) |
| 820 | } |
| 821 | } |
| 822 | if tt.wantOut != "" { |
| 823 | gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, false) |
| 824 | if err != nil { |
| 825 | t.Errorf("Marshal(): got %v, want nil error", err) |
| 826 | } |
| 827 | if string(gotOut) != tt.wantOut { |
| 828 | t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut) |
| 829 | } |
| 830 | } |
| 831 | if tt.wantOutBracket != "" { |
| 832 | gotOut, err := Marshal(tt.wantVal, "", [2]byte{'<', '>'}, false) |
| 833 | if err != nil { |
| 834 | t.Errorf("Marshal(Bracket): got %v, want nil error", err) |
| 835 | } |
| 836 | if string(gotOut) != tt.wantOutBracket { |
| 837 | t.Errorf("Marshal(Bracket):\ngot: %s\nwant: %s", gotOut, tt.wantOutBracket) |
| 838 | } |
| 839 | } |
| 840 | if tt.wantOutASCII != "" { |
| 841 | gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, true) |
| 842 | if err != nil { |
| 843 | t.Errorf("Marshal(ASCII): got %v, want nil error", err) |
| 844 | } |
| 845 | if string(gotOut) != tt.wantOutASCII { |
| 846 | t.Errorf("Marshal(ASCII):\ngot: %s\nwant: %s", gotOut, tt.wantOutASCII) |
| 847 | } |
| 848 | } |
| 849 | if tt.wantOutIndent != "" { |
| 850 | gotOut, err := Marshal(tt.wantVal, "\t", [2]byte{0, 0}, false) |
| 851 | if err != nil { |
| 852 | t.Errorf("Marshal(Indent): got %v, want nil error", err) |
| 853 | } |
| 854 | if string(gotOut) != tt.wantOutIndent { |
| 855 | t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent) |
| 856 | } |
| 857 | } |
| 858 | }) |
| 859 | } |
| 860 | } |