blob: 0ee6c85ad7f7bce1ac3e389f7bad24ab9170a9aa [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -08009 "fmt"
Joe Tsai879b18d2018-08-03 17:22:24 -070010 "io"
11 "regexp"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080012 "strconv"
Joe Tsai879b18d2018-08-03 17:22:24 -070013 "unicode/utf8"
14
Joe Tsai01ab2962018-09-21 17:44:00 -070015 "github.com/golang/protobuf/v2/internal/errors"
Joe Tsai879b18d2018-08-03 17:22:24 -070016)
17
Herbie Ongc96a79d2019-03-08 10:49:17 -080018// call specifies which Decoder method was invoked.
19type call uint8
20
21const (
22 readCall call = iota
23 peekCall
24)
25
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080026// Decoder is a token-based JSON decoder.
27type Decoder struct {
Herbie Ong8ac9dd22019-03-27 12:20:50 -070028 // lastCall is last method called, either readCall or peekCall.
29 // Initial value is readCall.
Herbie Ongc96a79d2019-03-08 10:49:17 -080030 lastCall call
31
32 // value contains the last read value.
33 value Value
34
35 // err contains the last read error.
36 err error
Joe Tsai879b18d2018-08-03 17:22:24 -070037
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080038 // startStack is a stack containing StartObject and StartArray types. The
39 // top of stack represents the object or the array the current value is
40 // directly located in.
41 startStack []Type
42
43 // orig is used in reporting line and column.
44 orig []byte
45 // in contains the unconsumed input.
46 in []byte
Joe Tsai879b18d2018-08-03 17:22:24 -070047}
48
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080049// NewDecoder returns a Decoder to read the given []byte.
50func NewDecoder(b []byte) *Decoder {
51 return &Decoder{orig: b, in: b}
52}
53
Herbie Ongc96a79d2019-03-08 10:49:17 -080054// Peek looks ahead and returns the next JSON type without advancing a read.
55func (d *Decoder) Peek() Type {
56 defer func() { d.lastCall = peekCall }()
57 if d.lastCall == readCall {
58 d.value, d.err = d.Read()
59 }
60 return d.value.typ
61}
62
63// Read returns the next JSON value. It will return an error if there is no
64// valid value. For String types containing invalid UTF8 characters, a
65// non-fatal error is returned and caller can call Read for the next value.
66func (d *Decoder) Read() (Value, error) {
67 defer func() { d.lastCall = readCall }()
68 if d.lastCall == peekCall {
69 return d.value, d.err
70 }
71
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080072 var nerr errors.NonFatal
73 value, n, err := d.parseNext()
74 if !nerr.Merge(err) {
Joe Tsai879b18d2018-08-03 17:22:24 -070075 return Value{}, err
76 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080077
78 switch value.typ {
79 case EOF:
80 if len(d.startStack) != 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -080081 d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080082 return Value{}, io.ErrUnexpectedEOF
83 }
84
85 case Null:
86 if !d.isValueNext() {
87 return Value{}, d.newSyntaxError("unexpected value null")
88 }
89
90 case Bool, Number:
91 if !d.isValueNext() {
Herbie Ong8ac9dd22019-03-27 12:20:50 -070092 return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080093 }
94
95 case String:
96 if d.isValueNext() {
97 break
98 }
99 // Check if this is for an object name.
Herbie Ongc96a79d2019-03-08 10:49:17 -0800100 if d.value.typ&(StartObject|comma) == 0 {
Herbie Ong8ac9dd22019-03-27 12:20:50 -0700101 return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800102 }
103 d.in = d.in[n:]
104 d.consume(0)
105 if c := d.in[0]; c != ':' {
106 return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
107 }
108 n = 1
109 value.typ = Name
110
111 case StartObject, StartArray:
112 if !d.isValueNext() {
Herbie Ong8ac9dd22019-03-27 12:20:50 -0700113 return Value{}, d.newSyntaxError("unexpected character %v", value.Raw())
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800114 }
115 d.startStack = append(d.startStack, value.typ)
116
117 case EndObject:
118 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800119 d.value.typ == comma ||
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800120 d.startStack[len(d.startStack)-1] != StartObject {
121 return Value{}, d.newSyntaxError("unexpected character }")
122 }
123 d.startStack = d.startStack[:len(d.startStack)-1]
124
125 case EndArray:
126 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800127 d.value.typ == comma ||
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800128 d.startStack[len(d.startStack)-1] != StartArray {
129 return Value{}, d.newSyntaxError("unexpected character ]")
130 }
131 d.startStack = d.startStack[:len(d.startStack)-1]
132
133 case comma:
134 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800135 d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800136 return Value{}, d.newSyntaxError("unexpected character ,")
137 }
Joe Tsai879b18d2018-08-03 17:22:24 -0700138 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800139
140 // Update lastType only after validating value to be in the right
141 // sequence.
Herbie Ongc96a79d2019-03-08 10:49:17 -0800142 d.value.typ = value.typ
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800143 d.in = d.in[n:]
144
Herbie Ongc96a79d2019-03-08 10:49:17 -0800145 if d.value.typ == comma {
146 return d.Read()
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800147 }
148 return value, nerr.E
Joe Tsai879b18d2018-08-03 17:22:24 -0700149}
150
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800151var (
152 literalRegexp = regexp.MustCompile(`^(null|true|false)`)
153 // Any sequence that looks like a non-delimiter (for error reporting).
154 errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
155)
Joe Tsai879b18d2018-08-03 17:22:24 -0700156
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800157// parseNext parses for the next JSON value. It returns a Value object for
158// different types, except for Name. It also returns the size that was parsed.
159// It does not handle whether the next value is in a valid sequence or not, it
160// only ensures that the value is a valid one.
161func (d *Decoder) parseNext() (value Value, n int, err error) {
162 // Trim leading spaces.
163 d.consume(0)
Joe Tsai879b18d2018-08-03 17:22:24 -0700164
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800165 in := d.in
166 if len(in) == 0 {
Herbie Ong670d8082019-03-31 19:10:33 -0700167 return d.newValue(nil, EOF), 0, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700168 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800169
170 switch in[0] {
Joe Tsai879b18d2018-08-03 17:22:24 -0700171 case 'n', 't', 'f':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800172 n := matchWithDelim(literalRegexp, in)
173 if n == 0 {
174 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700175 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800176 switch in[0] {
177 case 'n':
Herbie Ong670d8082019-03-31 19:10:33 -0700178 return d.newValue(in[:n], Null), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800179 case 't':
Herbie Ong670d8082019-03-31 19:10:33 -0700180 return d.newBoolValue(in[:n], true), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800181 case 'f':
Herbie Ong670d8082019-03-31 19:10:33 -0700182 return d.newBoolValue(in[:n], false), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800183 }
184
Joe Tsai879b18d2018-08-03 17:22:24 -0700185 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
Herbie Onga3421952019-03-21 18:12:26 -0700186 n, ok := consumeNumber(in)
187 if !ok {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800188 return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
189 }
Herbie Ong670d8082019-03-31 19:10:33 -0700190 return d.newValue(in[:n], Number), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800191
Joe Tsai879b18d2018-08-03 17:22:24 -0700192 case '"':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800193 var nerr errors.NonFatal
194 s, n, err := d.parseString(in)
195 if !nerr.Merge(err) {
196 return Value{}, 0, err
197 }
Herbie Ong670d8082019-03-31 19:10:33 -0700198 return d.newStringValue(in[:n], s), n, nerr.E
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800199
Joe Tsai879b18d2018-08-03 17:22:24 -0700200 case '{':
Herbie Ong670d8082019-03-31 19:10:33 -0700201 return d.newValue(in[:1], StartObject), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800202
203 case '}':
Herbie Ong670d8082019-03-31 19:10:33 -0700204 return d.newValue(in[:1], EndObject), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800205
206 case '[':
Herbie Ong670d8082019-03-31 19:10:33 -0700207 return d.newValue(in[:1], StartArray), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800208
209 case ']':
Herbie Ong670d8082019-03-31 19:10:33 -0700210 return d.newValue(in[:1], EndArray), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800211
212 case ',':
Herbie Ong670d8082019-03-31 19:10:33 -0700213 return d.newValue(in[:1], comma), 1, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700214 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800215 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700216}
217
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800218// position returns line and column number of parsed bytes.
219func (d *Decoder) position() (int, int) {
220 // Calculate line and column of consumed input.
221 b := d.orig[:len(d.orig)-len(d.in)]
222 line := bytes.Count(b, []byte("\n")) + 1
223 if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
224 b = b[i+1:]
Joe Tsai879b18d2018-08-03 17:22:24 -0700225 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800226 column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
227 return line, column
Joe Tsai879b18d2018-08-03 17:22:24 -0700228}
229
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800230// newSyntaxError returns an error with line and column information useful for
231// syntax errors.
232func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
233 e := errors.New(f, x...)
234 line, column := d.position()
235 return errors.New("syntax error (line %d:%d): %v", line, column, e)
Joe Tsai879b18d2018-08-03 17:22:24 -0700236}
237
Joe Tsai879b18d2018-08-03 17:22:24 -0700238// matchWithDelim matches r with the input b and verifies that the match
239// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
240// As a special case, EOF is considered a delimiter.
241func matchWithDelim(r *regexp.Regexp, b []byte) int {
242 n := len(r.Find(b))
243 if n < len(b) {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800244 // Check that the next character is a delimiter.
245 if isNotDelim(b[n]) {
Joe Tsai879b18d2018-08-03 17:22:24 -0700246 return 0
247 }
248 }
249 return n
250}
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800251
252// isNotDelim returns true if given byte is a not delimiter character.
253func isNotDelim(c byte) bool {
254 return (c == '-' || c == '+' || c == '.' || c == '_' ||
255 ('a' <= c && c <= 'z') ||
256 ('A' <= c && c <= 'Z') ||
257 ('0' <= c && c <= '9'))
258}
259
260// consume consumes n bytes of input and any subsequent whitespace.
261func (d *Decoder) consume(n int) {
262 d.in = d.in[n:]
263 for len(d.in) > 0 {
264 switch d.in[0] {
265 case ' ', '\n', '\r', '\t':
266 d.in = d.in[1:]
267 default:
268 return
269 }
270 }
271}
272
273// isValueNext returns true if next type should be a JSON value: Null,
274// Number, String or Bool.
275func (d *Decoder) isValueNext() bool {
276 if len(d.startStack) == 0 {
Herbie Ongc96a79d2019-03-08 10:49:17 -0800277 return d.value.typ == 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800278 }
279
280 start := d.startStack[len(d.startStack)-1]
281 switch start {
282 case StartObject:
Herbie Ongc96a79d2019-03-08 10:49:17 -0800283 return d.value.typ&Name != 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800284 case StartArray:
Herbie Ongc96a79d2019-03-08 10:49:17 -0800285 return d.value.typ&(StartArray|comma) != 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800286 }
287 panic(fmt.Sprintf(
288 "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
Herbie Ongc96a79d2019-03-08 10:49:17 -0800289 d.value.typ, start))
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800290}
291
Herbie Ong670d8082019-03-31 19:10:33 -0700292// newValue constructs a Value for given Type.
293func (d *Decoder) newValue(input []byte, typ Type) Value {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800294 line, column := d.position()
295 return Value{
296 input: input,
297 line: line,
298 column: column,
299 typ: typ,
Herbie Ong670d8082019-03-31 19:10:33 -0700300 }
301}
302
303// newBoolValue constructs a Value for a JSON boolean.
304func (d *Decoder) newBoolValue(input []byte, b bool) Value {
305 line, column := d.position()
306 return Value{
307 input: input,
308 line: line,
309 column: column,
310 typ: Bool,
311 boo: b,
312 }
313}
314
315// newStringValue constructs a Value for a JSON string.
316func (d *Decoder) newStringValue(input []byte, s string) Value {
317 line, column := d.position()
318 return Value{
319 input: input,
320 line: line,
321 column: column,
322 typ: String,
323 str: s,
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800324 }
325}
326
Herbie Ong8ac9dd22019-03-27 12:20:50 -0700327// Clone returns a copy of the Decoder for use in reading ahead the next JSON
328// object, array or other values without affecting current Decoder.
329func (d *Decoder) Clone() *Decoder {
330 ret := *d
331 ret.startStack = append([]Type(nil), ret.startStack...)
332 return &ret
333}
334
Herbie Ongc96a79d2019-03-08 10:49:17 -0800335// Value contains a JSON type and value parsed from calling Decoder.Read.
Herbie Ong670d8082019-03-31 19:10:33 -0700336// For JSON boolean and string, it holds the converted value in boo and str
337// fields respectively. For JSON number, input field holds a valid number which
338// is converted only in Int or Float. Other JSON types do not require any
339// additional data.
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800340type Value struct {
341 input []byte
342 line int
343 column int
344 typ Type
Herbie Ong670d8082019-03-31 19:10:33 -0700345 boo bool
346 str string
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800347}
348
349func (v Value) newError(f string, x ...interface{}) error {
350 e := errors.New(f, x...)
351 return errors.New("error (line %d:%d): %v", v.line, v.column, e)
352}
353
354// Type returns the JSON type.
355func (v Value) Type() Type {
356 return v.typ
357}
358
359// Position returns the line and column of the value.
360func (v Value) Position() (int, int) {
361 return v.line, v.column
362}
363
364// Bool returns the bool value if token is Bool, else it will return an error.
365func (v Value) Bool() (bool, error) {
366 if v.typ != Bool {
367 return false, v.newError("%s is not a bool", v.input)
368 }
Herbie Ong670d8082019-03-31 19:10:33 -0700369 return v.boo, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800370}
371
372// String returns the string value for a JSON string token or the read value in
373// string if token is not a string.
374func (v Value) String() string {
375 if v.typ != String {
376 return string(v.input)
377 }
Herbie Ong670d8082019-03-31 19:10:33 -0700378 return v.str
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800379}
380
381// Name returns the object name if token is Name, else it will return an error.
382func (v Value) Name() (string, error) {
383 if v.typ != Name {
384 return "", v.newError("%s is not an object name", v.input)
385 }
Herbie Ong670d8082019-03-31 19:10:33 -0700386 return v.str, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800387}
388
Herbie Ong8ac9dd22019-03-27 12:20:50 -0700389// Raw returns the read value in string.
390func (v Value) Raw() string {
391 return string(v.input)
392}
393
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800394// Float returns the floating-point number if token is Number, else it will
395// return an error.
396//
397// The floating-point precision is specified by the bitSize parameter: 32 for
398// float32 or 64 for float64. If bitSize=32, the result still has type float64,
399// but it will be convertible to float32 without changing its value. It will
400// return an error if the number exceeds the floating point limits for given
401// bitSize.
402func (v Value) Float(bitSize int) (float64, error) {
403 if v.typ != Number {
404 return 0, v.newError("%s is not a number", v.input)
405 }
406 f, err := strconv.ParseFloat(string(v.input), bitSize)
407 if err != nil {
408 return 0, v.newError("%v", err)
409 }
410 return f, nil
411}
412
413// Int returns the signed integer number if token is Number, else it will
414// return an error.
415//
416// The given bitSize specifies the integer type that the result must fit into.
417// It returns an error if the number is not an integer value or if the result
418// exceeds the limits for given bitSize.
419func (v Value) Int(bitSize int) (int64, error) {
420 s, err := v.getIntStr()
421 if err != nil {
422 return 0, err
423 }
424 n, err := strconv.ParseInt(s, 10, bitSize)
425 if err != nil {
426 return 0, v.newError("%v", err)
427 }
428 return n, nil
429}
430
431// Uint returns the signed integer number if token is Number, else it will
432// return an error.
433//
434// The given bitSize specifies the unsigned integer type that the result must
435// fit into. It returns an error if the number is not an unsigned integer value
436// or if the result exceeds the limits for given bitSize.
437func (v Value) Uint(bitSize int) (uint64, error) {
438 s, err := v.getIntStr()
439 if err != nil {
440 return 0, err
441 }
442 n, err := strconv.ParseUint(s, 10, bitSize)
443 if err != nil {
444 return 0, v.newError("%v", err)
445 }
446 return n, nil
447}
448
449func (v Value) getIntStr() (string, error) {
450 if v.typ != Number {
451 return "", v.newError("%s is not a number", v.input)
452 }
Herbie Onga3421952019-03-21 18:12:26 -0700453 parts, ok := parseNumber(v.input)
454 if !ok {
455 return "", v.newError("%s is not a number", v.input)
456 }
457 num, ok := normalizeToIntString(parts)
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800458 if !ok {
459 return "", v.newError("cannot convert %s to integer", v.input)
460 }
461 return num, nil
462}