internal/encoding/json: rewrite to a token-based encoder and decoder

Previous decoder decodes a JSON number into a float64, which lacks
64-bit integer precision.

I attempted to retrofit it with storing the raw bytes and parsed out
number parts, see golang.org/cl/164377.  While that is possible, the
encoding logic for Value is not symmetrical with the decoding logic and
can be confusing since both utilizes the same Value struct.

Joe and I decided that it would be better to rewrite the JSON encoder
and decoder to be token-based instead, removing the need for sharing a
model type plus making it more efficient.

Change-Id: Ic0601428a824be4e20141623409ab4d92b6167c7
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/165677
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/internal/encoding/json/encode.go b/internal/encoding/json/encode.go
index 93b0928..f73e224 100644
--- a/internal/encoding/json/encode.go
+++ b/internal/encoding/json/encode.go
@@ -5,124 +5,160 @@
 package json
 
 import (
+	"strconv"
 	"strings"
 
 	"github.com/golang/protobuf/v2/internal/errors"
 )
 
-// Marshal serializes v as the JSON format.
+// Encoder provides methods to write out JSON constructs and values. The user is
+// responsible for producing valid sequences of JSON constructs and values.
+type Encoder struct {
+	indent   string
+	lastType Type
+	indents  []byte
+	out      []byte
+}
+
+// NewEncoder returns an Encoder.
 //
 // If indent is a non-empty string, it causes every entry for an Array or Object
 // to be preceded by the indent and trailed by a newline.
-func Marshal(v Value, indent string) ([]byte, error) {
-	p := encoder{}
+func NewEncoder(indent string) (*Encoder, error) {
+	e := &Encoder{}
 	if len(indent) > 0 {
 		if strings.Trim(indent, " \t") != "" {
-			return nil, errors.New("indent may only be composed of space and tab characters")
+			return nil, errors.New("indent may only be composed of space or tab characters")
 		}
-		p.indent = indent
-		p.newline = "\n"
+		e.indent = indent
 	}
-	err := p.marshalValue(v)
-	if !p.nerr.Merge(err) {
-		return nil, err
-	}
-	return p.out, p.nerr.E
+	return e, nil
 }
 
-type encoder struct {
-	nerr errors.NonFatal
-	out  []byte
-
-	indent  string
-	indents []byte
-	newline string // set to "\n" if len(indent) > 0
+// Bytes returns the content of the written bytes.
+func (e *Encoder) Bytes() []byte {
+	return e.out
 }
 
-func (p *encoder) marshalValue(v Value) error {
-	switch v.Type() {
-	case Null:
-		p.out = append(p.out, "null"...)
-		return nil
-	case Bool:
-		if v.Bool() {
-			p.out = append(p.out, "true"...)
-		} else {
-			p.out = append(p.out, "false"...)
-		}
-		return nil
-	case Number:
-		return p.marshalNumber(v)
-	case String:
-		return p.marshalString(v)
-	case Array:
-		return p.marshalArray(v)
-	case Object:
-		return p.marshalObject(v)
-	default:
-		return errors.New("invalid type %v to encode value", v.Type())
+// WriteNull writes out the null value.
+func (e *Encoder) WriteNull() {
+	e.prepareNext(Null)
+	e.out = append(e.out, "null"...)
+}
+
+// WriteBool writes out the given boolean value.
+func (e *Encoder) WriteBool(b bool) {
+	e.prepareNext(Bool)
+	if b {
+		e.out = append(e.out, "true"...)
+	} else {
+		e.out = append(e.out, "false"...)
 	}
 }
 
-func (p *encoder) marshalArray(v Value) error {
-	if v.Type() != Array {
-		return errors.New("invalid type %v, expected array", v.Type())
+// WriteString writes out the given string in JSON string value.
+func (e *Encoder) WriteString(s string) error {
+	e.prepareNext(String)
+	var err error
+	if e.out, err = appendString(e.out, s); err != nil {
+		return err
 	}
-	elems := v.Array()
-	p.out = append(p.out, '[')
-	p.indents = append(p.indents, p.indent...)
-	if len(elems) > 0 {
-		p.out = append(p.out, p.newline...)
-	}
-	for i, elem := range elems {
-		p.out = append(p.out, p.indents...)
-		if err := p.marshalValue(elem); !p.nerr.Merge(err) {
-			return err
-		}
-		if i < len(elems)-1 {
-			p.out = append(p.out, ',')
-		}
-		p.out = append(p.out, p.newline...)
-	}
-	p.indents = p.indents[:len(p.indents)-len(p.indent)]
-	if len(elems) > 0 {
-		p.out = append(p.out, p.indents...)
-	}
-	p.out = append(p.out, ']')
 	return nil
 }
 
-func (p *encoder) marshalObject(v Value) error {
-	if v.Type() != Object {
-		return errors.New("invalid type %v, expected object", v.Type())
-	}
-	items := v.Object()
-	p.out = append(p.out, '{')
-	p.indents = append(p.indents, p.indent...)
-	if len(items) > 0 {
-		p.out = append(p.out, p.newline...)
-	}
-	for i, item := range items {
-		p.out = append(p.out, p.indents...)
-		if err := p.marshalString(item[0]); !p.nerr.Merge(err) {
-			return err
+// WriteFloat writes out the given float and bitSize in JSON number value.
+func (e *Encoder) WriteFloat(n float64, bitSize int) {
+	e.prepareNext(Number)
+	e.out = appendFloat(e.out, n, bitSize)
+}
+
+// WriteInt writes out the given signed integer in JSON number value.
+func (e *Encoder) WriteInt(n int64) {
+	e.prepareNext(Number)
+	e.out = append(e.out, strconv.FormatInt(n, 10)...)
+}
+
+// WriteUint writes out the given unsigned integer in JSON number value.
+func (e *Encoder) WriteUint(n uint64) {
+	e.prepareNext(Number)
+	e.out = append(e.out, strconv.FormatUint(n, 10)...)
+}
+
+// StartObject writes out the '{' symbol.
+func (e *Encoder) StartObject() {
+	e.prepareNext(StartObject)
+	e.out = append(e.out, '{')
+}
+
+// EndObject writes out the '}' symbol.
+func (e *Encoder) EndObject() {
+	e.prepareNext(EndObject)
+	e.out = append(e.out, '}')
+}
+
+// WriteName writes out the given string in JSON string value and the name
+// separator ':'.
+func (e *Encoder) WriteName(s string) error {
+	e.prepareNext(Name)
+	// Errors returned by appendString() are non-fatal.
+	var err error
+	e.out, err = appendString(e.out, s)
+	e.out = append(e.out, ':')
+	return err
+}
+
+// StartArray writes out the '[' symbol.
+func (e *Encoder) StartArray() {
+	e.prepareNext(StartArray)
+	e.out = append(e.out, '[')
+}
+
+// EndArray writes out the ']' symbol.
+func (e *Encoder) EndArray() {
+	e.prepareNext(EndArray)
+	e.out = append(e.out, ']')
+}
+
+// prepareNext adds possible comma and indentation for the next value based
+// on last type and indent option. It also updates lastType to next.
+func (e *Encoder) prepareNext(next Type) {
+	defer func() {
+		// Set lastType to next.
+		e.lastType = next
+	}()
+
+	if len(e.indent) == 0 {
+		// Need to add comma on the following condition.
+		if e.lastType&(Null|Bool|Number|String|EndObject|EndArray) != 0 &&
+			next&(Name|Null|Bool|Number|String|StartObject|StartArray) != 0 {
+			e.out = append(e.out, ',')
 		}
-		p.out = append(p.out, ':')
-		if len(p.indent) > 0 {
-			p.out = append(p.out, ' ')
-		}
-		if err := p.marshalValue(item[1]); !p.nerr.Merge(err) {
-			return err
-		}
-		if i < len(items)-1 {
-			p.out = append(p.out, ',')
-		}
-		p.out = append(p.out, p.newline...)
+		return
 	}
-	p.indents = p.indents[:len(p.indents)-len(p.indent)]
-	if len(items) > 0 {
-		p.out = append(p.out, p.indents...)
+
+	switch {
+	case e.lastType&(StartObject|StartArray) != 0:
+		// If next type is NOT closing, add indent and newline.
+		if next&(EndObject|EndArray) == 0 {
+			e.indents = append(e.indents, e.indent...)
+			e.out = append(e.out, '\n')
+			e.out = append(e.out, e.indents...)
+		}
+
+	case e.lastType&(Null|Bool|Number|String|EndObject|EndArray) != 0:
+		switch {
+		// If next type is either a value or name, add comma and newline.
+		case next&(Name|Null|Bool|Number|String|StartObject|StartArray) != 0:
+			e.out = append(e.out, ',', '\n')
+
+		// If next type is a closing object or array, adjust indentation.
+		case next&(EndObject|EndArray) != 0:
+			e.indents = e.indents[:len(e.indents)-len(e.indent)]
+			e.out = append(e.out, '\n')
+		}
+		e.out = append(e.out, e.indents...)
+
+	case e.lastType&Name != 0:
+		e.out = append(e.out, ' ')
 	}
-	p.out = append(p.out, '}')
-	return nil
 }