blob: 452e8736b92d1126242ef3bbe137ea5ebeabe535 [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -08009 "fmt"
Joe Tsai879b18d2018-08-03 17:22:24 -070010 "io"
11 "regexp"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080012 "strconv"
Joe Tsai879b18d2018-08-03 17:22:24 -070013 "unicode/utf8"
14
Joe Tsai01ab2962018-09-21 17:44:00 -070015 "github.com/golang/protobuf/v2/internal/errors"
Joe Tsai879b18d2018-08-03 17:22:24 -070016)
17
Herbie Ongc96a79d2019-03-08 10:49:17 -080018// call specifies which Decoder method was invoked.
19type call uint8
20
21const (
22 readCall call = iota
23 peekCall
24)
25
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080026// Decoder is a token-based JSON decoder.
27type Decoder struct {
Herbie Ongc96a79d2019-03-08 10:49:17 -080028 // lastCall is last method called, eiterh readCall or peekCall.
29 lastCall call
30
31 // value contains the last read value.
32 value Value
33
34 // err contains the last read error.
35 err error
Joe Tsai879b18d2018-08-03 17:22:24 -070036
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080037 // startStack is a stack containing StartObject and StartArray types. The
38 // top of stack represents the object or the array the current value is
39 // directly located in.
40 startStack []Type
41
42 // orig is used in reporting line and column.
43 orig []byte
44 // in contains the unconsumed input.
45 in []byte
Joe Tsai879b18d2018-08-03 17:22:24 -070046}
47
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080048// NewDecoder returns a Decoder to read the given []byte.
49func NewDecoder(b []byte) *Decoder {
50 return &Decoder{orig: b, in: b}
51}
52
Herbie Ongc96a79d2019-03-08 10:49:17 -080053// Peek looks ahead and returns the next JSON type without advancing a read.
54func (d *Decoder) Peek() Type {
55 defer func() { d.lastCall = peekCall }()
56 if d.lastCall == readCall {
57 d.value, d.err = d.Read()
58 }
59 return d.value.typ
60}
61
62// Read returns the next JSON value. It will return an error if there is no
63// valid value. For String types containing invalid UTF8 characters, a
64// non-fatal error is returned and caller can call Read for the next value.
65func (d *Decoder) Read() (Value, error) {
66 defer func() { d.lastCall = readCall }()
67 if d.lastCall == peekCall {
68 return d.value, d.err
69 }
70
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080071 var nerr errors.NonFatal
72 value, n, err := d.parseNext()
73 if !nerr.Merge(err) {
Joe Tsai879b18d2018-08-03 17:22:24 -070074 return Value{}, err
75 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080076
77 switch value.typ {
78 case EOF:
79 if len(d.startStack) != 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -080080 d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080081 return Value{}, io.ErrUnexpectedEOF
82 }
83
84 case Null:
85 if !d.isValueNext() {
86 return Value{}, d.newSyntaxError("unexpected value null")
87 }
88
89 case Bool, Number:
90 if !d.isValueNext() {
91 return Value{}, d.newSyntaxError("unexpected value %v", value)
92 }
93
94 case String:
95 if d.isValueNext() {
96 break
97 }
98 // Check if this is for an object name.
Herbie Ongc96a79d2019-03-08 10:49:17 -080099 if d.value.typ&(StartObject|comma) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800100 return Value{}, d.newSyntaxError("unexpected value %q", value)
101 }
102 d.in = d.in[n:]
103 d.consume(0)
104 if c := d.in[0]; c != ':' {
105 return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
106 }
107 n = 1
108 value.typ = Name
109
110 case StartObject, StartArray:
111 if !d.isValueNext() {
112 return Value{}, d.newSyntaxError("unexpected character %v", value)
113 }
114 d.startStack = append(d.startStack, value.typ)
115
116 case EndObject:
117 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800118 d.value.typ == comma ||
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800119 d.startStack[len(d.startStack)-1] != StartObject {
120 return Value{}, d.newSyntaxError("unexpected character }")
121 }
122 d.startStack = d.startStack[:len(d.startStack)-1]
123
124 case EndArray:
125 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800126 d.value.typ == comma ||
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800127 d.startStack[len(d.startStack)-1] != StartArray {
128 return Value{}, d.newSyntaxError("unexpected character ]")
129 }
130 d.startStack = d.startStack[:len(d.startStack)-1]
131
132 case comma:
133 if len(d.startStack) == 0 ||
Herbie Ongc96a79d2019-03-08 10:49:17 -0800134 d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800135 return Value{}, d.newSyntaxError("unexpected character ,")
136 }
Joe Tsai879b18d2018-08-03 17:22:24 -0700137 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800138
139 // Update lastType only after validating value to be in the right
140 // sequence.
Herbie Ongc96a79d2019-03-08 10:49:17 -0800141 d.value.typ = value.typ
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800142 d.in = d.in[n:]
143
Herbie Ongc96a79d2019-03-08 10:49:17 -0800144 if d.value.typ == comma {
145 return d.Read()
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800146 }
147 return value, nerr.E
Joe Tsai879b18d2018-08-03 17:22:24 -0700148}
149
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800150var (
151 literalRegexp = regexp.MustCompile(`^(null|true|false)`)
152 // Any sequence that looks like a non-delimiter (for error reporting).
153 errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
154)
Joe Tsai879b18d2018-08-03 17:22:24 -0700155
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800156// parseNext parses for the next JSON value. It returns a Value object for
157// different types, except for Name. It also returns the size that was parsed.
158// It does not handle whether the next value is in a valid sequence or not, it
159// only ensures that the value is a valid one.
160func (d *Decoder) parseNext() (value Value, n int, err error) {
161 // Trim leading spaces.
162 d.consume(0)
Joe Tsai879b18d2018-08-03 17:22:24 -0700163
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800164 in := d.in
165 if len(in) == 0 {
166 return d.newValue(EOF, nil, nil), 0, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700167 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800168
169 switch in[0] {
Joe Tsai879b18d2018-08-03 17:22:24 -0700170 case 'n', 't', 'f':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800171 n := matchWithDelim(literalRegexp, in)
172 if n == 0 {
173 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700174 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800175 switch in[0] {
176 case 'n':
177 return d.newValue(Null, in[:n], nil), n, nil
178 case 't':
179 return d.newValue(Bool, in[:n], true), n, nil
180 case 'f':
181 return d.newValue(Bool, in[:n], false), n, nil
182 }
183
Joe Tsai879b18d2018-08-03 17:22:24 -0700184 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800185 num, n := parseNumber(in)
186 if num == nil {
187 return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
188 }
189 return d.newValue(Number, in[:n], num), n, nil
190
Joe Tsai879b18d2018-08-03 17:22:24 -0700191 case '"':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800192 var nerr errors.NonFatal
193 s, n, err := d.parseString(in)
194 if !nerr.Merge(err) {
195 return Value{}, 0, err
196 }
197 return d.newValue(String, in[:n], s), n, nerr.E
198
Joe Tsai879b18d2018-08-03 17:22:24 -0700199 case '{':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800200 return d.newValue(StartObject, in[:1], nil), 1, nil
201
202 case '}':
203 return d.newValue(EndObject, in[:1], nil), 1, nil
204
205 case '[':
206 return d.newValue(StartArray, in[:1], nil), 1, nil
207
208 case ']':
209 return d.newValue(EndArray, in[:1], nil), 1, nil
210
211 case ',':
212 return d.newValue(comma, in[:1], nil), 1, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700213 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800214 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700215}
216
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800217// position returns line and column number of parsed bytes.
218func (d *Decoder) position() (int, int) {
219 // Calculate line and column of consumed input.
220 b := d.orig[:len(d.orig)-len(d.in)]
221 line := bytes.Count(b, []byte("\n")) + 1
222 if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
223 b = b[i+1:]
Joe Tsai879b18d2018-08-03 17:22:24 -0700224 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800225 column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
226 return line, column
Joe Tsai879b18d2018-08-03 17:22:24 -0700227}
228
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800229// newSyntaxError returns an error with line and column information useful for
230// syntax errors.
231func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
232 e := errors.New(f, x...)
233 line, column := d.position()
234 return errors.New("syntax error (line %d:%d): %v", line, column, e)
Joe Tsai879b18d2018-08-03 17:22:24 -0700235}
236
Joe Tsai879b18d2018-08-03 17:22:24 -0700237// matchWithDelim matches r with the input b and verifies that the match
238// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
239// As a special case, EOF is considered a delimiter.
240func matchWithDelim(r *regexp.Regexp, b []byte) int {
241 n := len(r.Find(b))
242 if n < len(b) {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800243 // Check that the next character is a delimiter.
244 if isNotDelim(b[n]) {
Joe Tsai879b18d2018-08-03 17:22:24 -0700245 return 0
246 }
247 }
248 return n
249}
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800250
251// isNotDelim returns true if given byte is a not delimiter character.
252func isNotDelim(c byte) bool {
253 return (c == '-' || c == '+' || c == '.' || c == '_' ||
254 ('a' <= c && c <= 'z') ||
255 ('A' <= c && c <= 'Z') ||
256 ('0' <= c && c <= '9'))
257}
258
259// consume consumes n bytes of input and any subsequent whitespace.
260func (d *Decoder) consume(n int) {
261 d.in = d.in[n:]
262 for len(d.in) > 0 {
263 switch d.in[0] {
264 case ' ', '\n', '\r', '\t':
265 d.in = d.in[1:]
266 default:
267 return
268 }
269 }
270}
271
272// isValueNext returns true if next type should be a JSON value: Null,
273// Number, String or Bool.
274func (d *Decoder) isValueNext() bool {
275 if len(d.startStack) == 0 {
Herbie Ongc96a79d2019-03-08 10:49:17 -0800276 return d.value.typ == 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800277 }
278
279 start := d.startStack[len(d.startStack)-1]
280 switch start {
281 case StartObject:
Herbie Ongc96a79d2019-03-08 10:49:17 -0800282 return d.value.typ&Name != 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800283 case StartArray:
Herbie Ongc96a79d2019-03-08 10:49:17 -0800284 return d.value.typ&(StartArray|comma) != 0
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800285 }
286 panic(fmt.Sprintf(
287 "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
Herbie Ongc96a79d2019-03-08 10:49:17 -0800288 d.value.typ, start))
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800289}
290
291// newValue constructs a Value.
292func (d *Decoder) newValue(typ Type, input []byte, value interface{}) Value {
293 line, column := d.position()
294 return Value{
295 input: input,
296 line: line,
297 column: column,
298 typ: typ,
299 value: value,
300 }
301}
302
Herbie Ongc96a79d2019-03-08 10:49:17 -0800303// Value contains a JSON type and value parsed from calling Decoder.Read.
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800304type Value struct {
305 input []byte
306 line int
307 column int
308 typ Type
309 // value will be set to the following Go type based on the type field:
310 // Bool => bool
311 // Number => *numberParts
312 // String => string
313 // Name => string
314 // It will be nil if none of the above.
315 value interface{}
316}
317
318func (v Value) newError(f string, x ...interface{}) error {
319 e := errors.New(f, x...)
320 return errors.New("error (line %d:%d): %v", v.line, v.column, e)
321}
322
323// Type returns the JSON type.
324func (v Value) Type() Type {
325 return v.typ
326}
327
328// Position returns the line and column of the value.
329func (v Value) Position() (int, int) {
330 return v.line, v.column
331}
332
333// Bool returns the bool value if token is Bool, else it will return an error.
334func (v Value) Bool() (bool, error) {
335 if v.typ != Bool {
336 return false, v.newError("%s is not a bool", v.input)
337 }
338 return v.value.(bool), nil
339}
340
341// String returns the string value for a JSON string token or the read value in
342// string if token is not a string.
343func (v Value) String() string {
344 if v.typ != String {
345 return string(v.input)
346 }
347 return v.value.(string)
348}
349
350// Name returns the object name if token is Name, else it will return an error.
351func (v Value) Name() (string, error) {
352 if v.typ != Name {
353 return "", v.newError("%s is not an object name", v.input)
354 }
355 return v.value.(string), nil
356}
357
358// Float returns the floating-point number if token is Number, else it will
359// return an error.
360//
361// The floating-point precision is specified by the bitSize parameter: 32 for
362// float32 or 64 for float64. If bitSize=32, the result still has type float64,
363// but it will be convertible to float32 without changing its value. It will
364// return an error if the number exceeds the floating point limits for given
365// bitSize.
366func (v Value) Float(bitSize int) (float64, error) {
367 if v.typ != Number {
368 return 0, v.newError("%s is not a number", v.input)
369 }
370 f, err := strconv.ParseFloat(string(v.input), bitSize)
371 if err != nil {
372 return 0, v.newError("%v", err)
373 }
374 return f, nil
375}
376
377// Int returns the signed integer number if token is Number, else it will
378// return an error.
379//
380// The given bitSize specifies the integer type that the result must fit into.
381// It returns an error if the number is not an integer value or if the result
382// exceeds the limits for given bitSize.
383func (v Value) Int(bitSize int) (int64, error) {
384 s, err := v.getIntStr()
385 if err != nil {
386 return 0, err
387 }
388 n, err := strconv.ParseInt(s, 10, bitSize)
389 if err != nil {
390 return 0, v.newError("%v", err)
391 }
392 return n, nil
393}
394
395// Uint returns the signed integer number if token is Number, else it will
396// return an error.
397//
398// The given bitSize specifies the unsigned integer type that the result must
399// fit into. It returns an error if the number is not an unsigned integer value
400// or if the result exceeds the limits for given bitSize.
401func (v Value) Uint(bitSize int) (uint64, error) {
402 s, err := v.getIntStr()
403 if err != nil {
404 return 0, err
405 }
406 n, err := strconv.ParseUint(s, 10, bitSize)
407 if err != nil {
408 return 0, v.newError("%v", err)
409 }
410 return n, nil
411}
412
413func (v Value) getIntStr() (string, error) {
414 if v.typ != Number {
415 return "", v.newError("%s is not a number", v.input)
416 }
417 pnum := v.value.(*numberParts)
418 num, ok := normalizeToIntString(pnum)
419 if !ok {
420 return "", v.newError("cannot convert %s to integer", v.input)
421 }
422 return num, nil
423}