blob: 4e31ee1d10825bb19516290dc05b262cdb405249 [file] [log] [blame]
Joe Tsai27c2a762018-08-01 16:48:18 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package text
6
7import (
8 "bytes"
9 "io"
10 "math"
Joe Tsai27c2a762018-08-01 16:48:18 -070011 "strconv"
Joe Tsai27c2a762018-08-01 16:48:18 -070012
Damien Neile89e6242019-05-13 23:55:40 -070013 "google.golang.org/protobuf/internal/errors"
Joe Tsai27c2a762018-08-01 16:48:18 -070014)
15
Herbie Ong84f09602019-01-17 19:31:47 -080016// marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64.
Joe Tsai27c2a762018-08-01 16:48:18 -070017func (p *encoder) marshalNumber(v Value) error {
18 var err error
19 p.out, err = appendNumber(p.out, v)
20 return err
21}
22func appendNumber(out []byte, v Value) ([]byte, error) {
23 if len(v.raw) > 0 {
24 switch v.Type() {
Herbie Ong84f09602019-01-17 19:31:47 -080025 case Bool, Int, Uint, Float32, Float64:
Joe Tsai27c2a762018-08-01 16:48:18 -070026 return append(out, v.raw...), nil
27 }
28 }
29 switch v.Type() {
30 case Bool:
31 if b, _ := v.Bool(); b {
32 return append(out, "true"...), nil
33 } else {
34 return append(out, "false"...), nil
35 }
36 case Int:
37 return strconv.AppendInt(out, int64(v.num), 10), nil
38 case Uint:
39 return strconv.AppendUint(out, uint64(v.num), 10), nil
Herbie Ong84f09602019-01-17 19:31:47 -080040 case Float32:
41 return appendFloat(out, v, 32)
42 case Float64:
43 return appendFloat(out, v, 64)
Joe Tsai27c2a762018-08-01 16:48:18 -070044 default:
45 return nil, errors.New("invalid type %v, expected bool or number", v.Type())
46 }
47}
48
Herbie Ong84f09602019-01-17 19:31:47 -080049func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) {
50 switch n := math.Float64frombits(v.num); {
51 case math.IsNaN(n):
52 return append(out, "nan"...), nil
53 case math.IsInf(n, +1):
54 return append(out, "inf"...), nil
55 case math.IsInf(n, -1):
56 return append(out, "-inf"...), nil
57 default:
58 return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil
59 }
60}
61
Joe Tsai27c2a762018-08-01 16:48:18 -070062// These regular expressions were derived by reverse engineering the C++ code
63// in tokenizer.cc and text_format.cc.
64var (
65 literals = map[string]interface{}{
66 // These exact literals are the ones supported in C++.
67 // In C++, a 1-bit unsigned integers is also allowed to represent
68 // a boolean. This is handled in Value.Bool.
69 "t": true,
70 "true": true,
71 "True": true,
72 "f": false,
73 "false": false,
74 "False": false,
75
76 // C++ permits "-nan" and the case-insensitive variants of these.
77 // However, Go continues to be case-sensitive.
78 "nan": math.NaN(),
79 "inf": math.Inf(+1),
80 "-inf": math.Inf(-1),
81 }
Joe Tsai27c2a762018-08-01 16:48:18 -070082)
83
Herbie Ong84f09602019-01-17 19:31:47 -080084// unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input.
Joe Tsai27c2a762018-08-01 16:48:18 -070085func (p *decoder) unmarshalNumber() (Value, error) {
86 v, n, err := consumeNumber(p.in)
87 p.consume(n)
88 return v, err
89}
Herbie Onga3369c52019-04-23 00:24:46 -070090
Joe Tsai27c2a762018-08-01 16:48:18 -070091func consumeNumber(in []byte) (Value, int, error) {
92 if len(in) == 0 {
93 return Value{}, 0, io.ErrUnexpectedEOF
94 }
Herbie Onga3369c52019-04-23 00:24:46 -070095 if v, n := matchLiteral(in); n > 0 {
96 return rawValueOf(v, in[:n]), n, nil
97 }
98
99 num, ok := parseNumber(in)
100 if !ok {
101 return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in))
102 }
103
104 if num.typ == numFloat {
105 f, err := strconv.ParseFloat(string(num.value), 64)
106 if err != nil {
107 return Value{}, 0, err
108 }
109 return rawValueOf(f, in[:num.size]), num.size, nil
110 }
111
112 if num.neg {
113 v, err := strconv.ParseInt(string(num.value), 0, 64)
114 if err != nil {
115 return Value{}, 0, err
116 }
117 return rawValueOf(v, num.value), num.size, nil
118 }
119 v, err := strconv.ParseUint(string(num.value), 0, 64)
120 if err != nil {
121 return Value{}, 0, err
122 }
123 return rawValueOf(v, num.value), num.size, nil
124}
125
126func matchLiteral(in []byte) (interface{}, int) {
127 switch in[0] {
128 case 't', 'T':
129 rest := in[1:]
130 if len(rest) == 0 || isDelim(rest[0]) {
131 return true, 1
132 }
133 if n := matchStringWithDelim("rue", rest); n > 0 {
134 return true, 4
135 }
136 case 'f', 'F':
137 rest := in[1:]
138 if len(rest) == 0 || isDelim(rest[0]) {
139 return false, 1
140 }
141 if n := matchStringWithDelim("alse", rest); n > 0 {
142 return false, 5
143 }
144 case 'n':
145 if n := matchStringWithDelim("nan", in); n > 0 {
146 return math.NaN(), 3
147 }
148 case 'i':
149 if n := matchStringWithDelim("inf", in); n > 0 {
150 return math.Inf(1), 3
151 }
152 case '-':
153 if n := matchStringWithDelim("-inf", in); n > 0 {
154 return math.Inf(-1), 4
Joe Tsai27c2a762018-08-01 16:48:18 -0700155 }
156 }
Herbie Onga3369c52019-04-23 00:24:46 -0700157 return nil, 0
158}
159
160func matchStringWithDelim(s string, b []byte) int {
161 if !bytes.HasPrefix(b, []byte(s)) {
162 return 0
163 }
164
165 n := len(s)
166 if n < len(b) && !isDelim(b[n]) {
167 return 0
168 }
169 return n
170}
171
172type numType uint8
173
174const (
175 numDec numType = (1 << iota) / 2
176 numHex
177 numOct
178 numFloat
179)
180
181// number is the result of parsing out a valid number from parseNumber. It
182// contains data for doing float or integer conversion via the strconv package.
183type number struct {
184 typ numType
185 neg bool
186 // Size of input taken up by the number. This may not be the same as
187 // len(number.value).
188 size int
189 // Bytes for doing strconv.Parse{Float,Int,Uint} conversion.
190 value []byte
191}
192
193// parseNumber constructs a number object from given input. It allows for the
194// following patterns:
195// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
196// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
197func parseNumber(input []byte) (number, bool) {
198 var size int
199 var neg bool
200 typ := numDec
201
202 s := input
203 if len(s) == 0 {
204 return number{}, false
205 }
206
207 // Optional -
208 if s[0] == '-' {
209 neg = true
210 s = s[1:]
211 size++
212 if len(s) == 0 {
213 return number{}, false
Joe Tsai27c2a762018-08-01 16:48:18 -0700214 }
215 }
Herbie Onga3369c52019-04-23 00:24:46 -0700216
217 // C++ allows for whitespace and comments in between the negative sign and
218 // the rest of the number. This logic currently does not but is consistent
219 // with v1.
220
221 switch {
222 case s[0] == '0':
223 if len(s) > 1 {
224 switch {
225 case s[1] == 'x' || s[1] == 'X':
226 // Parse as hex number.
227 typ = numHex
228 n := 2
229 s = s[2:]
230 for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
231 ('a' <= s[0] && s[0] <= 'f') ||
232 ('A' <= s[0] && s[0] <= 'F')) {
233 s = s[1:]
234 n++
235 }
236 if n == 2 {
237 return number{}, false
238 }
239 size += n
240
241 case '0' <= s[1] && s[1] <= '7':
242 // Parse as octal number.
243 typ = numOct
244 n := 2
245 s = s[2:]
246 for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
247 s = s[1:]
248 n++
249 }
250 size += n
Joe Tsai27c2a762018-08-01 16:48:18 -0700251 }
Herbie Onga3369c52019-04-23 00:24:46 -0700252
253 if typ&(numHex|numOct) > 0 {
254 if len(s) > 0 && !isDelim(s[0]) {
255 return number{}, false
256 }
257 return number{
258 typ: typ,
259 size: size,
260 neg: neg,
261 value: input[:size],
262 }, true
Joe Tsai27c2a762018-08-01 16:48:18 -0700263 }
Joe Tsai27c2a762018-08-01 16:48:18 -0700264 }
Herbie Onga3369c52019-04-23 00:24:46 -0700265 s = s[1:]
266 size++
267
268 case '1' <= s[0] && s[0] <= '9':
269 n := 1
270 s = s[1:]
271 for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
272 s = s[1:]
273 n++
274 }
275 size += n
276
277 case s[0] == '.':
278 // Handled below.
279
280 default:
281 return number{}, false
Joe Tsai27c2a762018-08-01 16:48:18 -0700282 }
Herbie Onga3369c52019-04-23 00:24:46 -0700283
284 // . followed by 0 or more digits.
285 if len(s) > 0 && s[0] == '.' {
286 typ = numFloat
287 n := 1
288 s = s[1:]
289 for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
290 s = s[1:]
291 n++
292 }
293 size += n
294 }
295
296 // e or E followed by an optional - or + and 1 or more digits.
297 if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
298 typ = numFloat
299 s = s[1:]
300 n := 1
301 if s[0] == '+' || s[0] == '-' {
302 s = s[1:]
303 n++
304 if len(s) == 0 {
305 return number{}, false
306 }
307 }
308 for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
309 s = s[1:]
310 n++
311 }
312 size += n
313 }
314
315 // At this point, input[:size] contains a valid number that can be converted
316 // via strconv.Parse{Float,Int,Uint}.
317 value := input[:size]
318
319 // Optional suffix f or F for floats.
320 if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
321 typ = numFloat
322 s = s[1:]
323 size++
324 }
325
326 // Check that next byte is a delimiter or it is at the end.
327 if len(s) > 0 && !isDelim(s[0]) {
328 return number{}, false
329 }
330
331 return number{
332 typ: typ,
333 size: size,
334 neg: neg,
335 value: value,
336 }, true
Joe Tsai27c2a762018-08-01 16:48:18 -0700337}