blob: a9262b8bcfa3998a19cdbd23f33b08df6248994a [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -08009 "fmt"
Joe Tsai879b18d2018-08-03 17:22:24 -070010 "io"
11 "regexp"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080012 "strconv"
Joe Tsai879b18d2018-08-03 17:22:24 -070013 "unicode/utf8"
14
Joe Tsai01ab2962018-09-21 17:44:00 -070015 "github.com/golang/protobuf/v2/internal/errors"
Joe Tsai879b18d2018-08-03 17:22:24 -070016)
17
Herbie Ongc96a79d2019-03-08 10:49:17 -080018// call specifies which Decoder method was invoked.
19type call uint8
20
21const (
22 readCall call = iota
23 peekCall
24)
25
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080026// Decoder is a token-based JSON decoder.
27type Decoder struct {
Herbie Ongc96a79d2019-03-08 10:49:17 -080028 // lastCall is last method called, eiterh readCall or peekCall.
29 lastCall call
30
31 // value contains the last read value.
32 value Value
33
34 // err contains the last read error.
35 err error
Joe Tsai879b18d2018-08-03 17:22:24 -070036
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080037 // startStack is a stack containing StartObject and StartArray types. The
38 // top of stack represents the object or the array the current value is
39 // directly located in.
40 startStack []Type
41
42 // orig is used in reporting line and column.
43 orig []byte
44 // in contains the unconsumed input.
45 in []byte
Joe Tsai879b18d2018-08-03 17:22:24 -070046}
47
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080048// NewDecoder returns a Decoder to read the given []byte.
49func NewDecoder(b []byte) *Decoder {
50 return &Decoder{orig: b, in: b}
51}
52
Herbie Ongc96a79d2019-03-08 10:49:17 -080053// Peek looks ahead and returns the next JSON type without advancing a read.
54func (d *Decoder) Peek() Type {
55 defer func() { d.lastCall = peekCall }()
56 if d.lastCall == readCall {
57 d.value, d.err = d.Read()
58 }
59 return d.value.typ
60}
61
62// Read returns the next JSON value. It will return an error if there is no
63// valid value. For String types containing invalid UTF8 characters, a
64// non-fatal error is returned and caller can call Read for the next value.
65func (d *Decoder) Read() (Value, error) {
66 defer func() { d.lastCall = readCall }()
67 if d.lastCall == peekCall {
68 return d.value, d.err
69 }
70
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080071 var nerr errors.NonFatal
72 value, n, err := d.parseNext()
73 if !nerr.Merge(err) {
Joe Tsai879b18d2018-08-03 17:22:24 -070074 return Value{}, err
75 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080076
77 switch value.typ {
78 case EOF:
79 if len(d.startStack) != 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -080080 d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080081 return Value{}, io.ErrUnexpectedEOF
82 }
83
84 case Null:
85 if !d.isValueNext() {
86 return Value{}, d.newSyntaxError("unexpected value null")
87 }
88
89 case Bool, Number:
90 if !d.isValueNext() {
91 return Value{}, d.newSyntaxError("unexpected value %v", value)
92 }
93
94 case String:
95 if d.isValueNext() {
96 break
97 }
98 // Check if this is for an object name.
Herbie Ongc96a79d2019-03-08 10:49:17 -080099 if d.value.typ&(StartObject|comma) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800100 return Value{}, d.newSyntaxError("unexpected value %q", value)
101 }
102 d.in = d.in[n:]
103 d.consume(0)
104 if c := d.in[0]; c != ':' {
105 return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
106 }
107 n = 1
108 value.typ = Name
109
110 case StartObject, StartArray:
111 if !d.isValueNext() {
112 return Value{}, d.newSyntaxError("unexpected character %v", value)
113 }
114 d.startStack = append(d.startStack, value.typ)
115
116 case EndObject:
117 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800118 d.value.typ == comma ||
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800119 d.startStack[len(d.startStack)-1] != StartObject {
120 return Value{}, d.newSyntaxError("unexpected character }")
121 }
122 d.startStack = d.startStack[:len(d.startStack)-1]
123
124 case EndArray:
125 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800126 d.value.typ == comma ||
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800127 d.startStack[len(d.startStack)-1] != StartArray {
128 return Value{}, d.newSyntaxError("unexpected character ]")
129 }
130 d.startStack = d.startStack[:len(d.startStack)-1]
131
132 case comma:
133 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800134 d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800135 return Value{}, d.newSyntaxError("unexpected character ,")
136 }
Joe Tsai879b18d2018-08-03 17:22:24 -0700137 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800138
139 // Update lastType only after validating value to be in the right
140 // sequence.
Herbie Ongc96a79d2019-03-08 10:49:17 -0800141 d.value.typ = value.typ
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800142 d.in = d.in[n:]
143
Herbie Ongc96a79d2019-03-08 10:49:17 -0800144 if d.value.typ == comma {
145 return d.Read()
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800146 }
147 return value, nerr.E
Joe Tsai879b18d2018-08-03 17:22:24 -0700148}
149
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800150var (
151 literalRegexp = regexp.MustCompile(`^(null|true|false)`)
152 // Any sequence that looks like a non-delimiter (for error reporting).
153 errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
154)
Joe Tsai879b18d2018-08-03 17:22:24 -0700155
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800156// parseNext parses for the next JSON value. It returns a Value object for
157// different types, except for Name. It also returns the size that was parsed.
158// It does not handle whether the next value is in a valid sequence or not, it
159// only ensures that the value is a valid one.
160func (d *Decoder) parseNext() (value Value, n int, err error) {
161 // Trim leading spaces.
162 d.consume(0)
Joe Tsai879b18d2018-08-03 17:22:24 -0700163
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800164 in := d.in
165 if len(in) == 0 {
Herbie Ong670d8082019-03-31 19:10:33 -0700166 return d.newValue(nil, EOF), 0, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700167 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800168
169 switch in[0] {
Joe Tsai879b18d2018-08-03 17:22:24 -0700170 case 'n', 't', 'f':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800171 n := matchWithDelim(literalRegexp, in)
172 if n == 0 {
173 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700174 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800175 switch in[0] {
176 case 'n':
Herbie Ong670d8082019-03-31 19:10:33 -0700177 return d.newValue(in[:n], Null), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800178 case 't':
Herbie Ong670d8082019-03-31 19:10:33 -0700179 return d.newBoolValue(in[:n], true), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800180 case 'f':
Herbie Ong670d8082019-03-31 19:10:33 -0700181 return d.newBoolValue(in[:n], false), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800182 }
183
Joe Tsai879b18d2018-08-03 17:22:24 -0700184 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
Herbie Onga3421952019-03-21 18:12:26 -0700185 n, ok := consumeNumber(in)
186 if !ok {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800187 return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
188 }
Herbie Ong670d8082019-03-31 19:10:33 -0700189 return d.newValue(in[:n], Number), n, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800190
Joe Tsai879b18d2018-08-03 17:22:24 -0700191 case '"':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800192 var nerr errors.NonFatal
193 s, n, err := d.parseString(in)
194 if !nerr.Merge(err) {
195 return Value{}, 0, err
196 }
Herbie Ong670d8082019-03-31 19:10:33 -0700197 return d.newStringValue(in[:n], s), n, nerr.E
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800198
Joe Tsai879b18d2018-08-03 17:22:24 -0700199 case '{':
Herbie Ong670d8082019-03-31 19:10:33 -0700200 return d.newValue(in[:1], StartObject), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800201
202 case '}':
Herbie Ong670d8082019-03-31 19:10:33 -0700203 return d.newValue(in[:1], EndObject), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800204
205 case '[':
Herbie Ong670d8082019-03-31 19:10:33 -0700206 return d.newValue(in[:1], StartArray), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800207
208 case ']':
Herbie Ong670d8082019-03-31 19:10:33 -0700209 return d.newValue(in[:1], EndArray), 1, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800210
211 case ',':
Herbie Ong670d8082019-03-31 19:10:33 -0700212 return d.newValue(in[:1], comma), 1, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700213 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800214 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700215}
216
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800217// position returns line and column number of parsed bytes.
218func (d *Decoder) position() (int, int) {
219 // Calculate line and column of consumed input.
220 b := d.orig[:len(d.orig)-len(d.in)]
221 line := bytes.Count(b, []byte("\n")) + 1
222 if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
223 b = b[i+1:]
Joe Tsai879b18d2018-08-03 17:22:24 -0700224 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800225 column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
226 return line, column
Joe Tsai879b18d2018-08-03 17:22:24 -0700227}
228
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800229// newSyntaxError returns an error with line and column information useful for
230// syntax errors.
231func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
232 e := errors.New(f, x...)
233 line, column := d.position()
234 return errors.New("syntax error (line %d:%d): %v", line, column, e)
Joe Tsai879b18d2018-08-03 17:22:24 -0700235}
236
Joe Tsai879b18d2018-08-03 17:22:24 -0700237// matchWithDelim matches r with the input b and verifies that the match
238// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
239// As a special case, EOF is considered a delimiter.
240func matchWithDelim(r *regexp.Regexp, b []byte) int {
241 n := len(r.Find(b))
242 if n < len(b) {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800243 // Check that the next character is a delimiter.
244 if isNotDelim(b[n]) {
Joe Tsai879b18d2018-08-03 17:22:24 -0700245 return 0
246 }
247 }
248 return n
249}
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800250
251// isNotDelim returns true if given byte is a not delimiter character.
252func isNotDelim(c byte) bool {
253 return (c == '-' || c == '+' || c == '.' || c == '_' ||
254 ('a' <= c && c <= 'z') ||
255 ('A' <= c && c <= 'Z') ||
256 ('0' <= c && c <= '9'))
257}
258
259// consume consumes n bytes of input and any subsequent whitespace.
260func (d *Decoder) consume(n int) {
261 d.in = d.in[n:]
262 for len(d.in) > 0 {
263 switch d.in[0] {
264 case ' ', '\n', '\r', '\t':
265 d.in = d.in[1:]
266 default:
267 return
268 }
269 }
270}
271
272// isValueNext returns true if next type should be a JSON value: Null,
273// Number, String or Bool.
274func (d *Decoder) isValueNext() bool {
275 if len(d.startStack) == 0 {
Herbie Ongc96a79d2019-03-08 10:49:17 -0800276 return d.value.typ == 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800277 }
278
279 start := d.startStack[len(d.startStack)-1]
280 switch start {
281 case StartObject:
Herbie Ongc96a79d2019-03-08 10:49:17 -0800282 return d.value.typ&Name != 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800283 case StartArray:
Herbie Ongc96a79d2019-03-08 10:49:17 -0800284 return d.value.typ&(StartArray|comma) != 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800285 }
286 panic(fmt.Sprintf(
287 "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
Herbie Ongc96a79d2019-03-08 10:49:17 -0800288 d.value.typ, start))
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800289}
290
Herbie Ong670d8082019-03-31 19:10:33 -0700291// newValue constructs a Value for given Type.
292func (d *Decoder) newValue(input []byte, typ Type) Value {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800293 line, column := d.position()
294 return Value{
295 input: input,
296 line: line,
297 column: column,
298 typ: typ,
Herbie Ong670d8082019-03-31 19:10:33 -0700299 }
300}
301
302// newBoolValue constructs a Value for a JSON boolean.
303func (d *Decoder) newBoolValue(input []byte, b bool) Value {
304 line, column := d.position()
305 return Value{
306 input: input,
307 line: line,
308 column: column,
309 typ: Bool,
310 boo: b,
311 }
312}
313
314// newStringValue constructs a Value for a JSON string.
315func (d *Decoder) newStringValue(input []byte, s string) Value {
316 line, column := d.position()
317 return Value{
318 input: input,
319 line: line,
320 column: column,
321 typ: String,
322 str: s,
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800323 }
324}
325
Herbie Ongc96a79d2019-03-08 10:49:17 -0800326// Value contains a JSON type and value parsed from calling Decoder.Read.
Herbie Ong670d8082019-03-31 19:10:33 -0700327// For JSON boolean and string, it holds the converted value in boo and str
328// fields respectively. For JSON number, input field holds a valid number which
329// is converted only in Int or Float. Other JSON types do not require any
330// additional data.
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800331type Value struct {
332 input []byte
333 line int
334 column int
335 typ Type
Herbie Ong670d8082019-03-31 19:10:33 -0700336 boo bool
337 str string
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800338}
339
340func (v Value) newError(f string, x ...interface{}) error {
341 e := errors.New(f, x...)
342 return errors.New("error (line %d:%d): %v", v.line, v.column, e)
343}
344
345// Type returns the JSON type.
346func (v Value) Type() Type {
347 return v.typ
348}
349
350// Position returns the line and column of the value.
351func (v Value) Position() (int, int) {
352 return v.line, v.column
353}
354
355// Bool returns the bool value if token is Bool, else it will return an error.
356func (v Value) Bool() (bool, error) {
357 if v.typ != Bool {
358 return false, v.newError("%s is not a bool", v.input)
359 }
Herbie Ong670d8082019-03-31 19:10:33 -0700360 return v.boo, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800361}
362
363// String returns the string value for a JSON string token or the read value in
364// string if token is not a string.
365func (v Value) String() string {
366 if v.typ != String {
367 return string(v.input)
368 }
Herbie Ong670d8082019-03-31 19:10:33 -0700369 return v.str
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800370}
371
372// Name returns the object name if token is Name, else it will return an error.
373func (v Value) Name() (string, error) {
374 if v.typ != Name {
375 return "", v.newError("%s is not an object name", v.input)
376 }
Herbie Ong670d8082019-03-31 19:10:33 -0700377 return v.str, nil
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800378}
379
380// Float returns the floating-point number if token is Number, else it will
381// return an error.
382//
383// The floating-point precision is specified by the bitSize parameter: 32 for
384// float32 or 64 for float64. If bitSize=32, the result still has type float64,
385// but it will be convertible to float32 without changing its value. It will
386// return an error if the number exceeds the floating point limits for given
387// bitSize.
388func (v Value) Float(bitSize int) (float64, error) {
389 if v.typ != Number {
390 return 0, v.newError("%s is not a number", v.input)
391 }
392 f, err := strconv.ParseFloat(string(v.input), bitSize)
393 if err != nil {
394 return 0, v.newError("%v", err)
395 }
396 return f, nil
397}
398
399// Int returns the signed integer number if token is Number, else it will
400// return an error.
401//
402// The given bitSize specifies the integer type that the result must fit into.
403// It returns an error if the number is not an integer value or if the result
404// exceeds the limits for given bitSize.
405func (v Value) Int(bitSize int) (int64, error) {
406 s, err := v.getIntStr()
407 if err != nil {
408 return 0, err
409 }
410 n, err := strconv.ParseInt(s, 10, bitSize)
411 if err != nil {
412 return 0, v.newError("%v", err)
413 }
414 return n, nil
415}
416
417// Uint returns the signed integer number if token is Number, else it will
418// return an error.
419//
420// The given bitSize specifies the unsigned integer type that the result must
421// fit into. It returns an error if the number is not an unsigned integer value
422// or if the result exceeds the limits for given bitSize.
423func (v Value) Uint(bitSize int) (uint64, error) {
424 s, err := v.getIntStr()
425 if err != nil {
426 return 0, err
427 }
428 n, err := strconv.ParseUint(s, 10, bitSize)
429 if err != nil {
430 return 0, v.newError("%v", err)
431 }
432 return n, nil
433}
434
435func (v Value) getIntStr() (string, error) {
436 if v.typ != Number {
437 return "", v.newError("%s is not a number", v.input)
438 }
Herbie Onga3421952019-03-21 18:12:26 -0700439 parts, ok := parseNumber(v.input)
440 if !ok {
441 return "", v.newError("%s is not a number", v.input)
442 }
443 num, ok := normalizeToIntString(parts)
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800444 if !ok {
445 return "", v.newError("cannot convert %s to integer", v.input)
446 }
447 return num, nil
448}