Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 1 | // Copyright 2018 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package text |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "io" |
| 10 | "math" |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 11 | "strconv" |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 12 | |
Damien Neil | e89e624 | 2019-05-13 23:55:40 -0700 | [diff] [blame] | 13 | "google.golang.org/protobuf/internal/errors" |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 14 | ) |
| 15 | |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 16 | // marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64. |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 17 | func (p *encoder) marshalNumber(v Value) error { |
| 18 | var err error |
| 19 | p.out, err = appendNumber(p.out, v) |
| 20 | return err |
| 21 | } |
| 22 | func appendNumber(out []byte, v Value) ([]byte, error) { |
| 23 | if len(v.raw) > 0 { |
| 24 | switch v.Type() { |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 25 | case Bool, Int, Uint, Float32, Float64: |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 26 | return append(out, v.raw...), nil |
| 27 | } |
| 28 | } |
| 29 | switch v.Type() { |
| 30 | case Bool: |
| 31 | if b, _ := v.Bool(); b { |
| 32 | return append(out, "true"...), nil |
| 33 | } else { |
| 34 | return append(out, "false"...), nil |
| 35 | } |
| 36 | case Int: |
| 37 | return strconv.AppendInt(out, int64(v.num), 10), nil |
| 38 | case Uint: |
| 39 | return strconv.AppendUint(out, uint64(v.num), 10), nil |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 40 | case Float32: |
| 41 | return appendFloat(out, v, 32) |
| 42 | case Float64: |
| 43 | return appendFloat(out, v, 64) |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 44 | default: |
| 45 | return nil, errors.New("invalid type %v, expected bool or number", v.Type()) |
| 46 | } |
| 47 | } |
| 48 | |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 49 | func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) { |
| 50 | switch n := math.Float64frombits(v.num); { |
| 51 | case math.IsNaN(n): |
| 52 | return append(out, "nan"...), nil |
| 53 | case math.IsInf(n, +1): |
| 54 | return append(out, "inf"...), nil |
| 55 | case math.IsInf(n, -1): |
| 56 | return append(out, "-inf"...), nil |
| 57 | default: |
| 58 | return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil |
| 59 | } |
| 60 | } |
| 61 | |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 62 | // These regular expressions were derived by reverse engineering the C++ code |
| 63 | // in tokenizer.cc and text_format.cc. |
| 64 | var ( |
| 65 | literals = map[string]interface{}{ |
| 66 | // These exact literals are the ones supported in C++. |
| 67 | // In C++, a 1-bit unsigned integers is also allowed to represent |
| 68 | // a boolean. This is handled in Value.Bool. |
| 69 | "t": true, |
| 70 | "true": true, |
| 71 | "True": true, |
| 72 | "f": false, |
| 73 | "false": false, |
| 74 | "False": false, |
| 75 | |
| 76 | // C++ permits "-nan" and the case-insensitive variants of these. |
| 77 | // However, Go continues to be case-sensitive. |
| 78 | "nan": math.NaN(), |
| 79 | "inf": math.Inf(+1), |
| 80 | "-inf": math.Inf(-1), |
| 81 | } |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 82 | ) |
| 83 | |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 84 | // unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input. |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 85 | func (p *decoder) unmarshalNumber() (Value, error) { |
| 86 | v, n, err := consumeNumber(p.in) |
| 87 | p.consume(n) |
| 88 | return v, err |
| 89 | } |
Herbie Ong | a3369c5 | 2019-04-23 00:24:46 -0700 | [diff] [blame] | 90 | |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 91 | func consumeNumber(in []byte) (Value, int, error) { |
| 92 | if len(in) == 0 { |
| 93 | return Value{}, 0, io.ErrUnexpectedEOF |
| 94 | } |
Herbie Ong | a3369c5 | 2019-04-23 00:24:46 -0700 | [diff] [blame] | 95 | if v, n := matchLiteral(in); n > 0 { |
| 96 | return rawValueOf(v, in[:n]), n, nil |
| 97 | } |
| 98 | |
| 99 | num, ok := parseNumber(in) |
| 100 | if !ok { |
| 101 | return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in)) |
| 102 | } |
| 103 | |
| 104 | if num.typ == numFloat { |
| 105 | f, err := strconv.ParseFloat(string(num.value), 64) |
| 106 | if err != nil { |
| 107 | return Value{}, 0, err |
| 108 | } |
| 109 | return rawValueOf(f, in[:num.size]), num.size, nil |
| 110 | } |
| 111 | |
| 112 | if num.neg { |
| 113 | v, err := strconv.ParseInt(string(num.value), 0, 64) |
| 114 | if err != nil { |
| 115 | return Value{}, 0, err |
| 116 | } |
| 117 | return rawValueOf(v, num.value), num.size, nil |
| 118 | } |
| 119 | v, err := strconv.ParseUint(string(num.value), 0, 64) |
| 120 | if err != nil { |
| 121 | return Value{}, 0, err |
| 122 | } |
| 123 | return rawValueOf(v, num.value), num.size, nil |
| 124 | } |
| 125 | |
| 126 | func matchLiteral(in []byte) (interface{}, int) { |
| 127 | switch in[0] { |
| 128 | case 't', 'T': |
| 129 | rest := in[1:] |
| 130 | if len(rest) == 0 || isDelim(rest[0]) { |
| 131 | return true, 1 |
| 132 | } |
| 133 | if n := matchStringWithDelim("rue", rest); n > 0 { |
| 134 | return true, 4 |
| 135 | } |
| 136 | case 'f', 'F': |
| 137 | rest := in[1:] |
| 138 | if len(rest) == 0 || isDelim(rest[0]) { |
| 139 | return false, 1 |
| 140 | } |
| 141 | if n := matchStringWithDelim("alse", rest); n > 0 { |
| 142 | return false, 5 |
| 143 | } |
| 144 | case 'n': |
| 145 | if n := matchStringWithDelim("nan", in); n > 0 { |
| 146 | return math.NaN(), 3 |
| 147 | } |
| 148 | case 'i': |
| 149 | if n := matchStringWithDelim("inf", in); n > 0 { |
| 150 | return math.Inf(1), 3 |
| 151 | } |
| 152 | case '-': |
| 153 | if n := matchStringWithDelim("-inf", in); n > 0 { |
| 154 | return math.Inf(-1), 4 |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 155 | } |
| 156 | } |
Herbie Ong | a3369c5 | 2019-04-23 00:24:46 -0700 | [diff] [blame] | 157 | return nil, 0 |
| 158 | } |
| 159 | |
| 160 | func matchStringWithDelim(s string, b []byte) int { |
| 161 | if !bytes.HasPrefix(b, []byte(s)) { |
| 162 | return 0 |
| 163 | } |
| 164 | |
| 165 | n := len(s) |
| 166 | if n < len(b) && !isDelim(b[n]) { |
| 167 | return 0 |
| 168 | } |
| 169 | return n |
| 170 | } |
| 171 | |
| 172 | type numType uint8 |
| 173 | |
| 174 | const ( |
| 175 | numDec numType = (1 << iota) / 2 |
| 176 | numHex |
| 177 | numOct |
| 178 | numFloat |
| 179 | ) |
| 180 | |
| 181 | // number is the result of parsing out a valid number from parseNumber. It |
| 182 | // contains data for doing float or integer conversion via the strconv package. |
| 183 | type number struct { |
| 184 | typ numType |
| 185 | neg bool |
| 186 | // Size of input taken up by the number. This may not be the same as |
| 187 | // len(number.value). |
| 188 | size int |
| 189 | // Bytes for doing strconv.Parse{Float,Int,Uint} conversion. |
| 190 | value []byte |
| 191 | } |
| 192 | |
| 193 | // parseNumber constructs a number object from given input. It allows for the |
| 194 | // following patterns: |
| 195 | // integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*) |
| 196 | // float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?) |
| 197 | func parseNumber(input []byte) (number, bool) { |
| 198 | var size int |
| 199 | var neg bool |
| 200 | typ := numDec |
| 201 | |
| 202 | s := input |
| 203 | if len(s) == 0 { |
| 204 | return number{}, false |
| 205 | } |
| 206 | |
| 207 | // Optional - |
| 208 | if s[0] == '-' { |
| 209 | neg = true |
| 210 | s = s[1:] |
| 211 | size++ |
| 212 | if len(s) == 0 { |
| 213 | return number{}, false |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 214 | } |
| 215 | } |
Herbie Ong | a3369c5 | 2019-04-23 00:24:46 -0700 | [diff] [blame] | 216 | |
| 217 | // C++ allows for whitespace and comments in between the negative sign and |
| 218 | // the rest of the number. This logic currently does not but is consistent |
| 219 | // with v1. |
| 220 | |
| 221 | switch { |
| 222 | case s[0] == '0': |
| 223 | if len(s) > 1 { |
| 224 | switch { |
| 225 | case s[1] == 'x' || s[1] == 'X': |
| 226 | // Parse as hex number. |
| 227 | typ = numHex |
| 228 | n := 2 |
| 229 | s = s[2:] |
| 230 | for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') || |
| 231 | ('a' <= s[0] && s[0] <= 'f') || |
| 232 | ('A' <= s[0] && s[0] <= 'F')) { |
| 233 | s = s[1:] |
| 234 | n++ |
| 235 | } |
| 236 | if n == 2 { |
| 237 | return number{}, false |
| 238 | } |
| 239 | size += n |
| 240 | |
| 241 | case '0' <= s[1] && s[1] <= '7': |
| 242 | // Parse as octal number. |
| 243 | typ = numOct |
| 244 | n := 2 |
| 245 | s = s[2:] |
| 246 | for len(s) > 0 && '0' <= s[0] && s[0] <= '7' { |
| 247 | s = s[1:] |
| 248 | n++ |
| 249 | } |
| 250 | size += n |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 251 | } |
Herbie Ong | a3369c5 | 2019-04-23 00:24:46 -0700 | [diff] [blame] | 252 | |
| 253 | if typ&(numHex|numOct) > 0 { |
| 254 | if len(s) > 0 && !isDelim(s[0]) { |
| 255 | return number{}, false |
| 256 | } |
| 257 | return number{ |
| 258 | typ: typ, |
| 259 | size: size, |
| 260 | neg: neg, |
| 261 | value: input[:size], |
| 262 | }, true |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 263 | } |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 264 | } |
Herbie Ong | a3369c5 | 2019-04-23 00:24:46 -0700 | [diff] [blame] | 265 | s = s[1:] |
| 266 | size++ |
| 267 | |
| 268 | case '1' <= s[0] && s[0] <= '9': |
| 269 | n := 1 |
| 270 | s = s[1:] |
| 271 | for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { |
| 272 | s = s[1:] |
| 273 | n++ |
| 274 | } |
| 275 | size += n |
| 276 | |
| 277 | case s[0] == '.': |
| 278 | // Handled below. |
| 279 | |
| 280 | default: |
| 281 | return number{}, false |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 282 | } |
Herbie Ong | a3369c5 | 2019-04-23 00:24:46 -0700 | [diff] [blame] | 283 | |
| 284 | // . followed by 0 or more digits. |
| 285 | if len(s) > 0 && s[0] == '.' { |
| 286 | typ = numFloat |
| 287 | n := 1 |
| 288 | s = s[1:] |
| 289 | for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { |
| 290 | s = s[1:] |
| 291 | n++ |
| 292 | } |
| 293 | size += n |
| 294 | } |
| 295 | |
| 296 | // e or E followed by an optional - or + and 1 or more digits. |
| 297 | if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { |
| 298 | typ = numFloat |
| 299 | s = s[1:] |
| 300 | n := 1 |
| 301 | if s[0] == '+' || s[0] == '-' { |
| 302 | s = s[1:] |
| 303 | n++ |
| 304 | if len(s) == 0 { |
| 305 | return number{}, false |
| 306 | } |
| 307 | } |
| 308 | for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { |
| 309 | s = s[1:] |
| 310 | n++ |
| 311 | } |
| 312 | size += n |
| 313 | } |
| 314 | |
| 315 | // At this point, input[:size] contains a valid number that can be converted |
| 316 | // via strconv.Parse{Float,Int,Uint}. |
| 317 | value := input[:size] |
| 318 | |
| 319 | // Optional suffix f or F for floats. |
| 320 | if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') { |
| 321 | typ = numFloat |
| 322 | s = s[1:] |
| 323 | size++ |
| 324 | } |
| 325 | |
| 326 | // Check that next byte is a delimiter or it is at the end. |
| 327 | if len(s) > 0 && !isDelim(s[0]) { |
| 328 | return number{}, false |
| 329 | } |
| 330 | |
| 331 | return number{ |
| 332 | typ: typ, |
| 333 | size: size, |
| 334 | neg: neg, |
| 335 | value: value, |
| 336 | }, true |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 337 | } |