Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 1 | // Copyright 2018 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package json |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 9 | "fmt" |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 10 | "io" |
| 11 | "regexp" |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 12 | "strconv" |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 13 | "unicode/utf8" |
| 14 | |
Joe Tsai | 01ab296 | 2018-09-21 17:44:00 -0700 | [diff] [blame] | 15 | "github.com/golang/protobuf/v2/internal/errors" |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 16 | ) |
| 17 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 18 | // Decoder is a token-based JSON decoder. |
| 19 | type Decoder struct { |
| 20 | lastType Type |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 21 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 22 | // startStack is a stack containing StartObject and StartArray types. The |
| 23 | // top of stack represents the object or the array the current value is |
| 24 | // directly located in. |
| 25 | startStack []Type |
| 26 | |
| 27 | // orig is used in reporting line and column. |
| 28 | orig []byte |
| 29 | // in contains the unconsumed input. |
| 30 | in []byte |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 31 | } |
| 32 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 33 | // NewDecoder returns a Decoder to read the given []byte. |
| 34 | func NewDecoder(b []byte) *Decoder { |
| 35 | return &Decoder{orig: b, in: b} |
| 36 | } |
| 37 | |
| 38 | // ReadNext returns the next JSON value. It will return an error if there is no |
| 39 | // valid JSON value. For String types containing invalid UTF8 characters, a |
| 40 | // non-fatal error is returned and caller can call ReadNext for the next value. |
| 41 | func (d *Decoder) ReadNext() (Value, error) { |
| 42 | var nerr errors.NonFatal |
| 43 | value, n, err := d.parseNext() |
| 44 | if !nerr.Merge(err) { |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 45 | return Value{}, err |
| 46 | } |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 47 | |
| 48 | switch value.typ { |
| 49 | case EOF: |
| 50 | if len(d.startStack) != 0 || |
| 51 | d.lastType&Null|Bool|Number|String|EndObject|EndArray == 0 { |
| 52 | return Value{}, io.ErrUnexpectedEOF |
| 53 | } |
| 54 | |
| 55 | case Null: |
| 56 | if !d.isValueNext() { |
| 57 | return Value{}, d.newSyntaxError("unexpected value null") |
| 58 | } |
| 59 | |
| 60 | case Bool, Number: |
| 61 | if !d.isValueNext() { |
| 62 | return Value{}, d.newSyntaxError("unexpected value %v", value) |
| 63 | } |
| 64 | |
| 65 | case String: |
| 66 | if d.isValueNext() { |
| 67 | break |
| 68 | } |
| 69 | // Check if this is for an object name. |
| 70 | if d.lastType&(StartObject|comma) == 0 { |
| 71 | return Value{}, d.newSyntaxError("unexpected value %q", value) |
| 72 | } |
| 73 | d.in = d.in[n:] |
| 74 | d.consume(0) |
| 75 | if c := d.in[0]; c != ':' { |
| 76 | return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c)) |
| 77 | } |
| 78 | n = 1 |
| 79 | value.typ = Name |
| 80 | |
| 81 | case StartObject, StartArray: |
| 82 | if !d.isValueNext() { |
| 83 | return Value{}, d.newSyntaxError("unexpected character %v", value) |
| 84 | } |
| 85 | d.startStack = append(d.startStack, value.typ) |
| 86 | |
| 87 | case EndObject: |
| 88 | if len(d.startStack) == 0 || |
| 89 | d.lastType == comma || |
| 90 | d.startStack[len(d.startStack)-1] != StartObject { |
| 91 | return Value{}, d.newSyntaxError("unexpected character }") |
| 92 | } |
| 93 | d.startStack = d.startStack[:len(d.startStack)-1] |
| 94 | |
| 95 | case EndArray: |
| 96 | if len(d.startStack) == 0 || |
| 97 | d.lastType == comma || |
| 98 | d.startStack[len(d.startStack)-1] != StartArray { |
| 99 | return Value{}, d.newSyntaxError("unexpected character ]") |
| 100 | } |
| 101 | d.startStack = d.startStack[:len(d.startStack)-1] |
| 102 | |
| 103 | case comma: |
| 104 | if len(d.startStack) == 0 || |
| 105 | d.lastType&(Null|Bool|Number|String|EndObject|EndArray) == 0 { |
| 106 | return Value{}, d.newSyntaxError("unexpected character ,") |
| 107 | } |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 108 | } |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 109 | |
| 110 | // Update lastType only after validating value to be in the right |
| 111 | // sequence. |
| 112 | d.lastType = value.typ |
| 113 | d.in = d.in[n:] |
| 114 | |
| 115 | if d.lastType == comma { |
| 116 | return d.ReadNext() |
| 117 | } |
| 118 | return value, nerr.E |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 119 | } |
| 120 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 121 | var ( |
| 122 | literalRegexp = regexp.MustCompile(`^(null|true|false)`) |
| 123 | // Any sequence that looks like a non-delimiter (for error reporting). |
| 124 | errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`) |
| 125 | ) |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 126 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 127 | // parseNext parses for the next JSON value. It returns a Value object for |
| 128 | // different types, except for Name. It also returns the size that was parsed. |
| 129 | // It does not handle whether the next value is in a valid sequence or not, it |
| 130 | // only ensures that the value is a valid one. |
| 131 | func (d *Decoder) parseNext() (value Value, n int, err error) { |
| 132 | // Trim leading spaces. |
| 133 | d.consume(0) |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 134 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 135 | in := d.in |
| 136 | if len(in) == 0 { |
| 137 | return d.newValue(EOF, nil, nil), 0, nil |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 138 | } |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 139 | |
| 140 | switch in[0] { |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 141 | case 'n', 't', 'f': |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 142 | n := matchWithDelim(literalRegexp, in) |
| 143 | if n == 0 { |
| 144 | return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in)) |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 145 | } |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 146 | switch in[0] { |
| 147 | case 'n': |
| 148 | return d.newValue(Null, in[:n], nil), n, nil |
| 149 | case 't': |
| 150 | return d.newValue(Bool, in[:n], true), n, nil |
| 151 | case 'f': |
| 152 | return d.newValue(Bool, in[:n], false), n, nil |
| 153 | } |
| 154 | |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 155 | case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 156 | num, n := parseNumber(in) |
| 157 | if num == nil { |
| 158 | return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in)) |
| 159 | } |
| 160 | return d.newValue(Number, in[:n], num), n, nil |
| 161 | |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 162 | case '"': |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 163 | var nerr errors.NonFatal |
| 164 | s, n, err := d.parseString(in) |
| 165 | if !nerr.Merge(err) { |
| 166 | return Value{}, 0, err |
| 167 | } |
| 168 | return d.newValue(String, in[:n], s), n, nerr.E |
| 169 | |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 170 | case '{': |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 171 | return d.newValue(StartObject, in[:1], nil), 1, nil |
| 172 | |
| 173 | case '}': |
| 174 | return d.newValue(EndObject, in[:1], nil), 1, nil |
| 175 | |
| 176 | case '[': |
| 177 | return d.newValue(StartArray, in[:1], nil), 1, nil |
| 178 | |
| 179 | case ']': |
| 180 | return d.newValue(EndArray, in[:1], nil), 1, nil |
| 181 | |
| 182 | case ',': |
| 183 | return d.newValue(comma, in[:1], nil), 1, nil |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 184 | } |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 185 | return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in)) |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 186 | } |
| 187 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 188 | // position returns line and column number of parsed bytes. |
| 189 | func (d *Decoder) position() (int, int) { |
| 190 | // Calculate line and column of consumed input. |
| 191 | b := d.orig[:len(d.orig)-len(d.in)] |
| 192 | line := bytes.Count(b, []byte("\n")) + 1 |
| 193 | if i := bytes.LastIndexByte(b, '\n'); i >= 0 { |
| 194 | b = b[i+1:] |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 195 | } |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 196 | column := utf8.RuneCount(b) + 1 // ignore multi-rune characters |
| 197 | return line, column |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 198 | } |
| 199 | |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 200 | // newSyntaxError returns an error with line and column information useful for |
| 201 | // syntax errors. |
| 202 | func (d *Decoder) newSyntaxError(f string, x ...interface{}) error { |
| 203 | e := errors.New(f, x...) |
| 204 | line, column := d.position() |
| 205 | return errors.New("syntax error (line %d:%d): %v", line, column, e) |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 206 | } |
| 207 | |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 208 | // matchWithDelim matches r with the input b and verifies that the match |
| 209 | // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]"). |
| 210 | // As a special case, EOF is considered a delimiter. |
| 211 | func matchWithDelim(r *regexp.Regexp, b []byte) int { |
| 212 | n := len(r.Find(b)) |
| 213 | if n < len(b) { |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 214 | // Check that the next character is a delimiter. |
| 215 | if isNotDelim(b[n]) { |
Joe Tsai | 879b18d | 2018-08-03 17:22:24 -0700 | [diff] [blame] | 216 | return 0 |
| 217 | } |
| 218 | } |
| 219 | return n |
| 220 | } |
Herbie Ong | d3f8f2d | 2019-03-06 00:28:23 -0800 | [diff] [blame^] | 221 | |
| 222 | // isNotDelim returns true if given byte is a not delimiter character. |
| 223 | func isNotDelim(c byte) bool { |
| 224 | return (c == '-' || c == '+' || c == '.' || c == '_' || |
| 225 | ('a' <= c && c <= 'z') || |
| 226 | ('A' <= c && c <= 'Z') || |
| 227 | ('0' <= c && c <= '9')) |
| 228 | } |
| 229 | |
| 230 | // consume consumes n bytes of input and any subsequent whitespace. |
| 231 | func (d *Decoder) consume(n int) { |
| 232 | d.in = d.in[n:] |
| 233 | for len(d.in) > 0 { |
| 234 | switch d.in[0] { |
| 235 | case ' ', '\n', '\r', '\t': |
| 236 | d.in = d.in[1:] |
| 237 | default: |
| 238 | return |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | // isValueNext returns true if next type should be a JSON value: Null, |
| 244 | // Number, String or Bool. |
| 245 | func (d *Decoder) isValueNext() bool { |
| 246 | if len(d.startStack) == 0 { |
| 247 | return d.lastType == 0 |
| 248 | } |
| 249 | |
| 250 | start := d.startStack[len(d.startStack)-1] |
| 251 | switch start { |
| 252 | case StartObject: |
| 253 | return d.lastType&Name != 0 |
| 254 | case StartArray: |
| 255 | return d.lastType&(StartArray|comma) != 0 |
| 256 | } |
| 257 | panic(fmt.Sprintf( |
| 258 | "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v", |
| 259 | d.lastType, start)) |
| 260 | } |
| 261 | |
| 262 | // newValue constructs a Value. |
| 263 | func (d *Decoder) newValue(typ Type, input []byte, value interface{}) Value { |
| 264 | line, column := d.position() |
| 265 | return Value{ |
| 266 | input: input, |
| 267 | line: line, |
| 268 | column: column, |
| 269 | typ: typ, |
| 270 | value: value, |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | // Value contains a JSON type and value parsed from calling Decoder.ReadNext. |
| 275 | type Value struct { |
| 276 | input []byte |
| 277 | line int |
| 278 | column int |
| 279 | typ Type |
| 280 | // value will be set to the following Go type based on the type field: |
| 281 | // Bool => bool |
| 282 | // Number => *numberParts |
| 283 | // String => string |
| 284 | // Name => string |
| 285 | // It will be nil if none of the above. |
| 286 | value interface{} |
| 287 | } |
| 288 | |
| 289 | func (v Value) newError(f string, x ...interface{}) error { |
| 290 | e := errors.New(f, x...) |
| 291 | return errors.New("error (line %d:%d): %v", v.line, v.column, e) |
| 292 | } |
| 293 | |
| 294 | // Type returns the JSON type. |
| 295 | func (v Value) Type() Type { |
| 296 | return v.typ |
| 297 | } |
| 298 | |
| 299 | // Position returns the line and column of the value. |
| 300 | func (v Value) Position() (int, int) { |
| 301 | return v.line, v.column |
| 302 | } |
| 303 | |
| 304 | // Bool returns the bool value if token is Bool, else it will return an error. |
| 305 | func (v Value) Bool() (bool, error) { |
| 306 | if v.typ != Bool { |
| 307 | return false, v.newError("%s is not a bool", v.input) |
| 308 | } |
| 309 | return v.value.(bool), nil |
| 310 | } |
| 311 | |
| 312 | // String returns the string value for a JSON string token or the read value in |
| 313 | // string if token is not a string. |
| 314 | func (v Value) String() string { |
| 315 | if v.typ != String { |
| 316 | return string(v.input) |
| 317 | } |
| 318 | return v.value.(string) |
| 319 | } |
| 320 | |
| 321 | // Name returns the object name if token is Name, else it will return an error. |
| 322 | func (v Value) Name() (string, error) { |
| 323 | if v.typ != Name { |
| 324 | return "", v.newError("%s is not an object name", v.input) |
| 325 | } |
| 326 | return v.value.(string), nil |
| 327 | } |
| 328 | |
| 329 | // Float returns the floating-point number if token is Number, else it will |
| 330 | // return an error. |
| 331 | // |
| 332 | // The floating-point precision is specified by the bitSize parameter: 32 for |
| 333 | // float32 or 64 for float64. If bitSize=32, the result still has type float64, |
| 334 | // but it will be convertible to float32 without changing its value. It will |
| 335 | // return an error if the number exceeds the floating point limits for given |
| 336 | // bitSize. |
| 337 | func (v Value) Float(bitSize int) (float64, error) { |
| 338 | if v.typ != Number { |
| 339 | return 0, v.newError("%s is not a number", v.input) |
| 340 | } |
| 341 | f, err := strconv.ParseFloat(string(v.input), bitSize) |
| 342 | if err != nil { |
| 343 | return 0, v.newError("%v", err) |
| 344 | } |
| 345 | return f, nil |
| 346 | } |
| 347 | |
| 348 | // Int returns the signed integer number if token is Number, else it will |
| 349 | // return an error. |
| 350 | // |
| 351 | // The given bitSize specifies the integer type that the result must fit into. |
| 352 | // It returns an error if the number is not an integer value or if the result |
| 353 | // exceeds the limits for given bitSize. |
| 354 | func (v Value) Int(bitSize int) (int64, error) { |
| 355 | s, err := v.getIntStr() |
| 356 | if err != nil { |
| 357 | return 0, err |
| 358 | } |
| 359 | n, err := strconv.ParseInt(s, 10, bitSize) |
| 360 | if err != nil { |
| 361 | return 0, v.newError("%v", err) |
| 362 | } |
| 363 | return n, nil |
| 364 | } |
| 365 | |
| 366 | // Uint returns the signed integer number if token is Number, else it will |
| 367 | // return an error. |
| 368 | // |
| 369 | // The given bitSize specifies the unsigned integer type that the result must |
| 370 | // fit into. It returns an error if the number is not an unsigned integer value |
| 371 | // or if the result exceeds the limits for given bitSize. |
| 372 | func (v Value) Uint(bitSize int) (uint64, error) { |
| 373 | s, err := v.getIntStr() |
| 374 | if err != nil { |
| 375 | return 0, err |
| 376 | } |
| 377 | n, err := strconv.ParseUint(s, 10, bitSize) |
| 378 | if err != nil { |
| 379 | return 0, v.newError("%v", err) |
| 380 | } |
| 381 | return n, nil |
| 382 | } |
| 383 | |
| 384 | func (v Value) getIntStr() (string, error) { |
| 385 | if v.typ != Number { |
| 386 | return "", v.newError("%s is not a number", v.input) |
| 387 | } |
| 388 | pnum := v.value.(*numberParts) |
| 389 | num, ok := normalizeToIntString(pnum) |
| 390 | if !ok { |
| 391 | return "", v.newError("cannot convert %s to integer", v.input) |
| 392 | } |
| 393 | return num, nil |
| 394 | } |