blob: 769619b7f2d3fccfe3a10b40681ec447b79f34b9 [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
9 "io"
10 "regexp"
11 "unicode/utf8"
12
Joe Tsai01ab2962018-09-21 17:44:00 -070013 "github.com/golang/protobuf/v2/internal/errors"
Joe Tsai879b18d2018-08-03 17:22:24 -070014)
15
16type syntaxError struct{ error }
17
18func newSyntaxError(f string, x ...interface{}) error {
19 return syntaxError{errors.New(f, x...)}
20}
21
22// Unmarshal parses b as the JSON format.
23// It returns a Value, which represents the input as an AST.
24func Unmarshal(b []byte) (Value, error) {
25 p := decoder{in: b}
26 p.consume(0) // trim leading spaces
27 v, err := p.unmarshalValue()
28 if !p.nerr.Merge(err) {
29 if e, ok := err.(syntaxError); ok {
30 b = b[:len(b)-len(p.in)] // consumed input
31 line := bytes.Count(b, []byte("\n")) + 1
32 if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
33 b = b[i+1:]
34 }
35 column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
36 err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
37 }
38 return Value{}, err
39 }
40 if len(p.in) > 0 {
41 return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
42 }
43 return v, p.nerr.E
44}
45
46type decoder struct {
47 nerr errors.NonFatal
48 in []byte
49}
50
51var literalRegexp = regexp.MustCompile("^(null|true|false)")
52
53func (p *decoder) unmarshalValue() (Value, error) {
54 if len(p.in) == 0 {
55 return Value{}, io.ErrUnexpectedEOF
56 }
57 switch p.in[0] {
58 case 'n', 't', 'f':
59 if n := matchWithDelim(literalRegexp, p.in); n > 0 {
60 var v Value
61 switch p.in[0] {
62 case 'n':
63 v = rawValueOf(nil, p.in[:n:n])
64 case 't':
65 v = rawValueOf(true, p.in[:n:n])
66 case 'f':
67 v = rawValueOf(false, p.in[:n:n])
68 }
69 p.consume(n)
70 return v, nil
71 }
72 return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
73 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
74 return p.unmarshalNumber()
75 case '"':
76 return p.unmarshalString()
77 case '[':
78 return p.unmarshalArray()
79 case '{':
80 return p.unmarshalObject()
81 default:
82 return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
83 }
84}
85
86func (p *decoder) unmarshalArray() (Value, error) {
87 b := p.in
88 var elems []Value
89 if err := p.consumeChar('[', "at start of array"); err != nil {
90 return Value{}, err
91 }
92 if len(p.in) > 0 && p.in[0] != ']' {
93 for len(p.in) > 0 {
94 v, err := p.unmarshalValue()
95 if !p.nerr.Merge(err) {
96 return Value{}, err
97 }
98 elems = append(elems, v)
99 if !p.tryConsumeChar(',') {
100 break
101 }
102 }
103 }
104 if err := p.consumeChar(']', "at end of array"); err != nil {
105 return Value{}, err
106 }
107 b = b[:len(b)-len(p.in)]
108 return rawValueOf(elems, b[:len(b):len(b)]), nil
109}
110
111func (p *decoder) unmarshalObject() (Value, error) {
112 b := p.in
113 var items [][2]Value
114 if err := p.consumeChar('{', "at start of object"); err != nil {
115 return Value{}, err
116 }
117 if len(p.in) > 0 && p.in[0] != '}' {
118 for len(p.in) > 0 {
119 k, err := p.unmarshalString()
120 if !p.nerr.Merge(err) {
121 return Value{}, err
122 }
123 if err := p.consumeChar(':', "in object"); err != nil {
124 return Value{}, err
125 }
126 v, err := p.unmarshalValue()
127 if !p.nerr.Merge(err) {
128 return Value{}, err
129 }
130 items = append(items, [2]Value{k, v})
131 if !p.tryConsumeChar(',') {
132 break
133 }
134 }
135 }
136 if err := p.consumeChar('}', "at end of object"); err != nil {
137 return Value{}, err
138 }
139 b = b[:len(b)-len(p.in)]
140 return rawValueOf(items, b[:len(b):len(b)]), nil
141}
142
143func (p *decoder) consumeChar(c byte, msg string) error {
144 if p.tryConsumeChar(c) {
145 return nil
146 }
147 if len(p.in) == 0 {
148 return io.ErrUnexpectedEOF
149 }
150 return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
151}
152
153func (p *decoder) tryConsumeChar(c byte) bool {
154 if len(p.in) > 0 && p.in[0] == c {
155 p.consume(1)
156 return true
157 }
158 return false
159}
160
161// consume consumes n bytes of input and any subsequent whitespace.
162func (p *decoder) consume(n int) {
163 p.in = p.in[n:]
164 for len(p.in) > 0 {
165 switch p.in[0] {
166 case ' ', '\n', '\r', '\t':
167 p.in = p.in[1:]
168 default:
169 return
170 }
171 }
172}
173
174// Any sequence that looks like a non-delimiter (for error reporting).
175var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")
176
177// matchWithDelim matches r with the input b and verifies that the match
178// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
179// As a special case, EOF is considered a delimiter.
180func matchWithDelim(r *regexp.Regexp, b []byte) int {
181 n := len(r.Find(b))
182 if n < len(b) {
183 // Check that that the next character is a delimiter.
184 c := b[n]
185 notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
186 ('a' <= c && c <= 'z') ||
187 ('A' <= c && c <= 'Z') ||
188 ('0' <= c && c <= '9'))
189 if notDelim {
190 return 0
191 }
192 }
193 return n
194}