internal/encoding/json: rewrite to a token-based encoder and decoder Previous decoder decodes a JSON number into a float64, which lacks 64-bit integer precision. I attempted to retrofit it with storing the raw bytes and parsed out number parts, see golang.org/cl/164377. While that is possible, the encoding logic for Value is not symmetrical with the decoding logic and can be confusing since both utilizes the same Value struct. Joe and I decided that it would be better to rewrite the JSON encoder and decoder to be token-based instead, removing the need for sharing a model type plus making it more efficient. Change-Id: Ic0601428a824be4e20141623409ab4d92b6167c7 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/165677 Reviewed-by: Damien Neil <dneil@google.com>

commit: d3f8f2d4122f4b739c70822f3aae4f82bf28cac9 [log] [tgz]
author: Herbie Ong <herbie@google.com> Wed Mar 06 00:28:23 2019 -0800
committer: Herbie Ong <herbie@google.com> Mon Mar 11 21:53:21 2019 +0000
tree: 2e418f3305b51aca8bd89825324f2acfb67b3321
parent: 9d8c804b555ce2597eb7145ce9070022815d1328 [diff] [blame]
diff --git a/internal/encoding/json/number.go b/internal/encoding/json/number.go
index ec99cbd..6b3ed86 100644
--- a/internal/encoding/json/number.go
+++ b/internal/encoding/json/number.go

@@ -5,45 +5,32 @@
 package json
 
 import (
-	"io"
+	"bytes"
 	"math"
-	"regexp"
 	"strconv"
-
-	"github.com/golang/protobuf/v2/internal/errors"
 )
 
-// marshalNumber encodes v as a Number.
-func (p *encoder) marshalNumber(v Value) error {
-	var err error
-	p.out, err = appendNumber(p.out, v)
-	return err
-}
-func appendNumber(out []byte, v Value) ([]byte, error) {
-	if v.Type() != Number {
-		return nil, errors.New("invalid type %v, expected number", v.Type())
-	}
-	if len(v.raw) > 0 {
-		return append(out, v.raw...), nil
-	}
-	n := v.Number()
-	if math.IsInf(n, 0) || math.IsNaN(n) {
-		return nil, errors.New("invalid number value: %v", n)
+// appendFloat formats given float in bitSize, and appends to the given []byte.
+func appendFloat(out []byte, n float64, bitSize int) []byte {
+	switch {
+	case math.IsNaN(n):
+		return append(out, `"NaN"`...)
+	case math.IsInf(n, +1):
+		return append(out, `"Infinity"`...)
+	case math.IsInf(n, -1):
+		return append(out, `"-Infinity"`...)
 	}
 
 	// JSON number formatting logic based on encoding/json.
 	// See floatEncoder.encode for reference.
-	bits := 64
-	if float64(float32(n)) == n {
-		bits = 32
-	}
 	fmt := byte('f')
 	if abs := math.Abs(n); abs != 0 {
-		if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
+		if bitSize == 64 && (abs < 1e-6 || abs >= 1e21) ||
+			bitSize == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
 			fmt = 'e'
 		}
 	}
-	out = strconv.AppendFloat(out, n, fmt, -1, bits)
+	out = strconv.AppendFloat(out, n, fmt, -1, bitSize)
 	if fmt == 'e' {
 		n := len(out)
 		if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' {
@@ -51,29 +38,172 @@
 			out = out[:n-1]
 		}
 	}
-	return out, nil
+	return out
 }
 
-// Exact expression to match a JSON floating-point number.
-// JSON's grammar for floats is more restrictive than Go's grammar.
-var floatRegexp = regexp.MustCompile("^-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?")
-
-// unmarshalNumber decodes a Number from the input.
-func (p *decoder) unmarshalNumber() (Value, error) {
-	v, n, err := consumeNumber(p.in)
-	p.consume(n)
-	return v, err
+// numberParts is the result of parsing out a valid JSON number. It contains
+// the parts of a number. The parts are used for integer conversion.
+type numberParts struct {
+	neg  bool
+	intp []byte
+	frac []byte
+	exp  []byte
 }
-func consumeNumber(in []byte) (Value, int, error) {
-	if len(in) == 0 {
-		return Value{}, 0, io.ErrUnexpectedEOF
+
+// parseNumber returns a numberParts instance if it is able to read a JSON
+// number from the given []byte. It also returns the number of bytes read.
+// Parsing logic follows the definition in
+// https://tools.ietf.org/html/rfc7159#section-6, and is based off
+// encoding/json.isValidNumber function.
+func parseNumber(input []byte) (*numberParts, int) {
+	var n int
+	var neg bool
+	var intp []byte
+	var frac []byte
+	var exp []byte
+
+	s := input
+	if len(s) == 0 {
+		return nil, 0
 	}
-	if n := matchWithDelim(floatRegexp, in); n > 0 {
-		v, err := strconv.ParseFloat(string(in[:n]), 64)
-		if err != nil {
-			return Value{}, 0, err
+
+	// Optional -
+	if s[0] == '-' {
+		neg = true
+		s = s[1:]
+		n++
+		if len(s) == 0 {
+			return nil, 0
 		}
-		return rawValueOf(v, in[:n:n]), n, nil
 	}
-	return Value{}, 0, newSyntaxError("invalid %q as number", errRegexp.Find(in))
+
+	// Digits
+	switch {
+	case s[0] == '0':
+		// Skip first 0 and no need to store.
+		s = s[1:]
+		n++
+
+	case '1' <= s[0] && s[0] <= '9':
+		intp = append(intp, s[0])
+		s = s[1:]
+		n++
+		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+			intp = append(intp, s[0])
+			s = s[1:]
+			n++
+		}
+
+	default:
+		return nil, 0
+	}
+
+	// . followed by 1 or more digits.
+	if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' {
+		frac = append(frac, s[1])
+		s = s[2:]
+		n += 2
+		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+			frac = append(frac, s[0])
+			s = s[1:]
+			n++
+		}
+	}
+
+	// e or E followed by an optional - or + and
+	// 1 or more digits.
+	if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
+		s = s[1:]
+		n++
+		if s[0] == '+' || s[0] == '-' {
+			exp = append(exp, s[0])
+			s = s[1:]
+			n++
+			if len(s) == 0 {
+				return nil, 0
+			}
+		}
+		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+			exp = append(exp, s[0])
+			s = s[1:]
+			n++
+		}
+	}
+
+	// Check that next byte is a delimiter or it is at the end.
+	if n < len(input) && isNotDelim(input[n]) {
+		return nil, 0
+	}
+
+	return &numberParts{
+		neg:  neg,
+		intp: intp,
+		frac: bytes.TrimRight(frac, "0"), // Remove unnecessary 0s to the right.
+		exp:  exp,
+	}, n
+}
+
+// normalizeToIntString returns an integer string in normal form without the
+// E-notation for given numberParts. It will return false if it is not an
+// integer or if the exponent exceeds than max/min int value.
+func normalizeToIntString(n *numberParts) (string, bool) {
+	num := n.intp
+	intpSize := len(num)
+	fracSize := len(n.frac)
+
+	if intpSize == 0 && fracSize == 0 {
+		return "0", true
+	}
+
+	var exp int
+	if len(n.exp) > 0 {
+		i, err := strconv.ParseInt(string(n.exp), 10, 32)
+		if err != nil {
+			return "", false
+		}
+		exp = int(i)
+	}
+
+	if exp >= 0 {
+		// For positive E, shift fraction digits into integer part and also pad
+		// with zeroes as needed.
+
+		// If there are more digits in fraction than the E value, then number is
+		// not an integer.
+		if fracSize > exp {
+			return "", false
+		}
+
+		num = append(num, n.frac...)
+		for i := 0; i < exp-fracSize; i++ {
+			num = append(num, '0')
+		}
+
+	} else {
+		// For negative E, shift digits in integer part out.
+
+		// If there are any fractions to begin with, then number is not an
+		// integer.
+		if fracSize > 0 {
+			return "", false
+		}
+
+		index := intpSize + exp
+		if index < 0 {
+			return "", false
+		}
+		// If any of the digits being shifted out is non-zero, then number is
+		// not an integer.
+		for i := index; i < intpSize; i++ {
+			if num[i] != '0' {
+				return "", false
+			}
+		}
+		num = num[:index]
+	}
+
+	if n.neg {
+		return "-" + string(num), true
+	}
+	return string(num), true
 }
commit	d3f8f2d4122f4b739c70822f3aae4f82bf28cac9	[log] [tgz]
author	Herbie Ong <herbie@google.com>	Wed Mar 06 00:28:23 2019 -0800
committer	Herbie Ong <herbie@google.com>	Mon Mar 11 21:53:21 2019 +0000
tree	2e418f3305b51aca8bd89825324f2acfb67b3321
parent	9d8c804b555ce2597eb7145ce9070022815d1328 [diff] [blame]