blob: 543abbca48bda701891fa4b8538a611f78e35180 [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -08009 "fmt"
Joe Tsai879b18d2018-08-03 17:22:24 -070010 "io"
11 "regexp"
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080012 "strconv"
Joe Tsai879b18d2018-08-03 17:22:24 -070013 "unicode/utf8"
14
Joe Tsai01ab2962018-09-21 17:44:00 -070015 "github.com/golang/protobuf/v2/internal/errors"
Joe Tsai879b18d2018-08-03 17:22:24 -070016)
17
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080018// Decoder is a token-based JSON decoder.
19type Decoder struct {
20 lastType Type
Joe Tsai879b18d2018-08-03 17:22:24 -070021
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080022 // startStack is a stack containing StartObject and StartArray types. The
23 // top of stack represents the object or the array the current value is
24 // directly located in.
25 startStack []Type
26
27 // orig is used in reporting line and column.
28 orig []byte
29 // in contains the unconsumed input.
30 in []byte
Joe Tsai879b18d2018-08-03 17:22:24 -070031}
32
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080033// NewDecoder returns a Decoder to read the given []byte.
34func NewDecoder(b []byte) *Decoder {
35 return &Decoder{orig: b, in: b}
36}
37
38// ReadNext returns the next JSON value. It will return an error if there is no
39// valid JSON value. For String types containing invalid UTF8 characters, a
40// non-fatal error is returned and caller can call ReadNext for the next value.
41func (d *Decoder) ReadNext() (Value, error) {
42 var nerr errors.NonFatal
43 value, n, err := d.parseNext()
44 if !nerr.Merge(err) {
Joe Tsai879b18d2018-08-03 17:22:24 -070045 return Value{}, err
46 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080047
48 switch value.typ {
49 case EOF:
50 if len(d.startStack) != 0 ||
51 d.lastType&Null|Bool|Number|String|EndObject|EndArray == 0 {
52 return Value{}, io.ErrUnexpectedEOF
53 }
54
55 case Null:
56 if !d.isValueNext() {
57 return Value{}, d.newSyntaxError("unexpected value null")
58 }
59
60 case Bool, Number:
61 if !d.isValueNext() {
62 return Value{}, d.newSyntaxError("unexpected value %v", value)
63 }
64
65 case String:
66 if d.isValueNext() {
67 break
68 }
69 // Check if this is for an object name.
70 if d.lastType&(StartObject|comma) == 0 {
71 return Value{}, d.newSyntaxError("unexpected value %q", value)
72 }
73 d.in = d.in[n:]
74 d.consume(0)
75 if c := d.in[0]; c != ':' {
76 return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
77 }
78 n = 1
79 value.typ = Name
80
81 case StartObject, StartArray:
82 if !d.isValueNext() {
83 return Value{}, d.newSyntaxError("unexpected character %v", value)
84 }
85 d.startStack = append(d.startStack, value.typ)
86
87 case EndObject:
88 if len(d.startStack) == 0 ||
89 d.lastType == comma ||
90 d.startStack[len(d.startStack)-1] != StartObject {
91 return Value{}, d.newSyntaxError("unexpected character }")
92 }
93 d.startStack = d.startStack[:len(d.startStack)-1]
94
95 case EndArray:
96 if len(d.startStack) == 0 ||
97 d.lastType == comma ||
98 d.startStack[len(d.startStack)-1] != StartArray {
99 return Value{}, d.newSyntaxError("unexpected character ]")
100 }
101 d.startStack = d.startStack[:len(d.startStack)-1]
102
103 case comma:
104 if len(d.startStack) == 0 ||
105 d.lastType&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
106 return Value{}, d.newSyntaxError("unexpected character ,")
107 }
Joe Tsai879b18d2018-08-03 17:22:24 -0700108 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800109
110 // Update lastType only after validating value to be in the right
111 // sequence.
112 d.lastType = value.typ
113 d.in = d.in[n:]
114
115 if d.lastType == comma {
116 return d.ReadNext()
117 }
118 return value, nerr.E
Joe Tsai879b18d2018-08-03 17:22:24 -0700119}
120
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800121var (
122 literalRegexp = regexp.MustCompile(`^(null|true|false)`)
123 // Any sequence that looks like a non-delimiter (for error reporting).
124 errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
125)
Joe Tsai879b18d2018-08-03 17:22:24 -0700126
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800127// parseNext parses for the next JSON value. It returns a Value object for
128// different types, except for Name. It also returns the size that was parsed.
129// It does not handle whether the next value is in a valid sequence or not, it
130// only ensures that the value is a valid one.
131func (d *Decoder) parseNext() (value Value, n int, err error) {
132 // Trim leading spaces.
133 d.consume(0)
Joe Tsai879b18d2018-08-03 17:22:24 -0700134
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800135 in := d.in
136 if len(in) == 0 {
137 return d.newValue(EOF, nil, nil), 0, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700138 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800139
140 switch in[0] {
Joe Tsai879b18d2018-08-03 17:22:24 -0700141 case 'n', 't', 'f':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800142 n := matchWithDelim(literalRegexp, in)
143 if n == 0 {
144 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700145 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800146 switch in[0] {
147 case 'n':
148 return d.newValue(Null, in[:n], nil), n, nil
149 case 't':
150 return d.newValue(Bool, in[:n], true), n, nil
151 case 'f':
152 return d.newValue(Bool, in[:n], false), n, nil
153 }
154
Joe Tsai879b18d2018-08-03 17:22:24 -0700155 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800156 num, n := parseNumber(in)
157 if num == nil {
158 return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
159 }
160 return d.newValue(Number, in[:n], num), n, nil
161
Joe Tsai879b18d2018-08-03 17:22:24 -0700162 case '"':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800163 var nerr errors.NonFatal
164 s, n, err := d.parseString(in)
165 if !nerr.Merge(err) {
166 return Value{}, 0, err
167 }
168 return d.newValue(String, in[:n], s), n, nerr.E
169
Joe Tsai879b18d2018-08-03 17:22:24 -0700170 case '{':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800171 return d.newValue(StartObject, in[:1], nil), 1, nil
172
173 case '}':
174 return d.newValue(EndObject, in[:1], nil), 1, nil
175
176 case '[':
177 return d.newValue(StartArray, in[:1], nil), 1, nil
178
179 case ']':
180 return d.newValue(EndArray, in[:1], nil), 1, nil
181
182 case ',':
183 return d.newValue(comma, in[:1], nil), 1, nil
Joe Tsai879b18d2018-08-03 17:22:24 -0700184 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800185 return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
Joe Tsai879b18d2018-08-03 17:22:24 -0700186}
187
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800188// position returns line and column number of parsed bytes.
189func (d *Decoder) position() (int, int) {
190 // Calculate line and column of consumed input.
191 b := d.orig[:len(d.orig)-len(d.in)]
192 line := bytes.Count(b, []byte("\n")) + 1
193 if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
194 b = b[i+1:]
Joe Tsai879b18d2018-08-03 17:22:24 -0700195 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800196 column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
197 return line, column
Joe Tsai879b18d2018-08-03 17:22:24 -0700198}
199
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800200// newSyntaxError returns an error with line and column information useful for
201// syntax errors.
202func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
203 e := errors.New(f, x...)
204 line, column := d.position()
205 return errors.New("syntax error (line %d:%d): %v", line, column, e)
Joe Tsai879b18d2018-08-03 17:22:24 -0700206}
207
Joe Tsai879b18d2018-08-03 17:22:24 -0700208// matchWithDelim matches r with the input b and verifies that the match
209// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
210// As a special case, EOF is considered a delimiter.
211func matchWithDelim(r *regexp.Regexp, b []byte) int {
212 n := len(r.Find(b))
213 if n < len(b) {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800214 // Check that the next character is a delimiter.
215 if isNotDelim(b[n]) {
Joe Tsai879b18d2018-08-03 17:22:24 -0700216 return 0
217 }
218 }
219 return n
220}
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800221
222// isNotDelim returns true if given byte is a not delimiter character.
223func isNotDelim(c byte) bool {
224 return (c == '-' || c == '+' || c == '.' || c == '_' ||
225 ('a' <= c && c <= 'z') ||
226 ('A' <= c && c <= 'Z') ||
227 ('0' <= c && c <= '9'))
228}
229
230// consume consumes n bytes of input and any subsequent whitespace.
231func (d *Decoder) consume(n int) {
232 d.in = d.in[n:]
233 for len(d.in) > 0 {
234 switch d.in[0] {
235 case ' ', '\n', '\r', '\t':
236 d.in = d.in[1:]
237 default:
238 return
239 }
240 }
241}
242
243// isValueNext returns true if next type should be a JSON value: Null,
244// Number, String or Bool.
245func (d *Decoder) isValueNext() bool {
246 if len(d.startStack) == 0 {
247 return d.lastType == 0
248 }
249
250 start := d.startStack[len(d.startStack)-1]
251 switch start {
252 case StartObject:
253 return d.lastType&Name != 0
254 case StartArray:
255 return d.lastType&(StartArray|comma) != 0
256 }
257 panic(fmt.Sprintf(
258 "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
259 d.lastType, start))
260}
261
262// newValue constructs a Value.
263func (d *Decoder) newValue(typ Type, input []byte, value interface{}) Value {
264 line, column := d.position()
265 return Value{
266 input: input,
267 line: line,
268 column: column,
269 typ: typ,
270 value: value,
271 }
272}
273
274// Value contains a JSON type and value parsed from calling Decoder.ReadNext.
275type Value struct {
276 input []byte
277 line int
278 column int
279 typ Type
280 // value will be set to the following Go type based on the type field:
281 // Bool => bool
282 // Number => *numberParts
283 // String => string
284 // Name => string
285 // It will be nil if none of the above.
286 value interface{}
287}
288
289func (v Value) newError(f string, x ...interface{}) error {
290 e := errors.New(f, x...)
291 return errors.New("error (line %d:%d): %v", v.line, v.column, e)
292}
293
294// Type returns the JSON type.
295func (v Value) Type() Type {
296 return v.typ
297}
298
299// Position returns the line and column of the value.
300func (v Value) Position() (int, int) {
301 return v.line, v.column
302}
303
304// Bool returns the bool value if token is Bool, else it will return an error.
305func (v Value) Bool() (bool, error) {
306 if v.typ != Bool {
307 return false, v.newError("%s is not a bool", v.input)
308 }
309 return v.value.(bool), nil
310}
311
312// String returns the string value for a JSON string token or the read value in
313// string if token is not a string.
314func (v Value) String() string {
315 if v.typ != String {
316 return string(v.input)
317 }
318 return v.value.(string)
319}
320
321// Name returns the object name if token is Name, else it will return an error.
322func (v Value) Name() (string, error) {
323 if v.typ != Name {
324 return "", v.newError("%s is not an object name", v.input)
325 }
326 return v.value.(string), nil
327}
328
329// Float returns the floating-point number if token is Number, else it will
330// return an error.
331//
332// The floating-point precision is specified by the bitSize parameter: 32 for
333// float32 or 64 for float64. If bitSize=32, the result still has type float64,
334// but it will be convertible to float32 without changing its value. It will
335// return an error if the number exceeds the floating point limits for given
336// bitSize.
337func (v Value) Float(bitSize int) (float64, error) {
338 if v.typ != Number {
339 return 0, v.newError("%s is not a number", v.input)
340 }
341 f, err := strconv.ParseFloat(string(v.input), bitSize)
342 if err != nil {
343 return 0, v.newError("%v", err)
344 }
345 return f, nil
346}
347
348// Int returns the signed integer number if token is Number, else it will
349// return an error.
350//
351// The given bitSize specifies the integer type that the result must fit into.
352// It returns an error if the number is not an integer value or if the result
353// exceeds the limits for given bitSize.
354func (v Value) Int(bitSize int) (int64, error) {
355 s, err := v.getIntStr()
356 if err != nil {
357 return 0, err
358 }
359 n, err := strconv.ParseInt(s, 10, bitSize)
360 if err != nil {
361 return 0, v.newError("%v", err)
362 }
363 return n, nil
364}
365
366// Uint returns the signed integer number if token is Number, else it will
367// return an error.
368//
369// The given bitSize specifies the unsigned integer type that the result must
370// fit into. It returns an error if the number is not an unsigned integer value
371// or if the result exceeds the limits for given bitSize.
372func (v Value) Uint(bitSize int) (uint64, error) {
373 s, err := v.getIntStr()
374 if err != nil {
375 return 0, err
376 }
377 n, err := strconv.ParseUint(s, 10, bitSize)
378 if err != nil {
379 return 0, v.newError("%v", err)
380 }
381 return n, nil
382}
383
384func (v Value) getIntStr() (string, error) {
385 if v.typ != Number {
386 return "", v.newError("%s is not a number", v.input)
387 }
388 pnum := v.value.(*numberParts)
389 num, ok := normalizeToIntString(pnum)
390 if !ok {
391 return "", v.newError("cannot convert %s to integer", v.input)
392 }
393 return num, nil
394}