proto: wire encoding support

Add proto.Marshal.

Change-Id: If7254bb4c4cbbee782a2a163762f9fcf98e7ab08
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/167388
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/proto/encode.go b/proto/encode.go
new file mode 100644
index 0000000..68a3448
--- /dev/null
+++ b/proto/encode.go
@@ -0,0 +1,216 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package proto
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/golang/protobuf/v2/internal/encoding/wire"
+	"github.com/golang/protobuf/v2/internal/mapsort"
+	"github.com/golang/protobuf/v2/internal/pragma"
+	"github.com/golang/protobuf/v2/reflect/protoreflect"
+)
+
+// MarshalOptions configures the marshaler.
+//
+// Example usage:
+//   b, err := MarshalOptions{Deterministic: true}.Marshal(m)
+type MarshalOptions struct {
+	// Deterministic controls whether the same message will always be
+	// serialized to the same bytes within the same binary.
+	//
+	// Setting this option guarantees that repeated serialization of
+	// the same message will return the same bytes, and that different
+	// processes of the same binary (which may be executing on different
+	// machines) will serialize equal messages to the same bytes.
+	//
+	// Note that the deterministic serialization is NOT canonical across
+	// languages. It is not guaranteed to remain stable over time. It is
+	// unstable across different builds with schema changes due to unknown
+	// fields. Users who need canonical serialization (e.g., persistent
+	// storage in a canonical form, fingerprinting, etc.) must define
+	// their own canonicalization specification and implement their own
+	// serializer rather than relying on this API.
+	//
+	// If deterministic serialization is requested, map entries will be
+	// sorted by keys in lexographical order. This is an implementation
+	// detail and subject to change.
+	Deterministic bool
+
+	pragma.NoUnkeyedLiterals
+}
+
+// Marshal returns the wire-format encoding of m.
+func Marshal(m Message) ([]byte, error) {
+	return MarshalOptions{}.MarshalAppend(nil, m)
+}
+
+// Marshal returns the wire-format encoding of m.
+func (o MarshalOptions) Marshal(m Message) ([]byte, error) {
+	return o.marshalMessage(nil, m.ProtoReflect())
+}
+
+// MarshalAppend appends the wire-format encoding of m to b,
+// returning the result.
+func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) {
+	return o.marshalMessage(b, m.ProtoReflect())
+}
+
+func (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) {
+	// There are many choices for what order we visit fields in. The default one here
+	// is chosen for reasonable efficiency and simplicity given the protoreflect API.
+	// It is not deterministic, since KnownFields.Range does not return fields in any
+	// defined order.
+	//
+	// When using deterministic serialization, we sort the known fields by field number.
+	fields := m.Type().Fields()
+	knownFields := m.KnownFields()
+	var err error
+	o.rangeKnown(knownFields, func(num protoreflect.FieldNumber, value protoreflect.Value) bool {
+		field := fields.ByNumber(num)
+		if field == nil {
+			field = knownFields.ExtensionTypes().ByNumber(num)
+			if field == nil {
+				panic(fmt.Errorf("no descriptor for field %d in %q", num, m.Type().FullName()))
+			}
+		}
+		b, err = o.marshalField(b, field, value)
+		return err == nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	m.UnknownFields().Range(func(_ protoreflect.FieldNumber, raw protoreflect.RawFields) bool {
+		b = append(b, raw...)
+		return true
+	})
+	// TODO: required field checks
+	return b, nil
+}
+
+// rangeKnown visits known fields in field number order when deterministic
+// serialization is enabled.
+func (o MarshalOptions) rangeKnown(knownFields protoreflect.KnownFields, f func(protoreflect.FieldNumber, protoreflect.Value) bool) {
+	if !o.Deterministic {
+		knownFields.Range(f)
+		return
+	}
+	nums := make([]protoreflect.FieldNumber, 0, knownFields.Len())
+	knownFields.Range(func(num protoreflect.FieldNumber, _ protoreflect.Value) bool {
+		nums = append(nums, num)
+		return true
+	})
+	sort.Slice(nums, func(a, b int) bool {
+		return nums[a] < nums[b]
+	})
+	for _, num := range nums {
+		if !f(num, knownFields.Get(num)) {
+			break
+		}
+	}
+}
+
+func (o MarshalOptions) marshalField(b []byte, field protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) {
+	num := field.Number()
+	kind := field.Kind()
+	switch {
+	case field.Cardinality() != protoreflect.Repeated:
+		b = wire.AppendTag(b, num, wireTypes[kind])
+		return o.marshalSingular(b, num, kind, value)
+	case field.IsMap():
+		return o.marshalMap(b, num, kind, field.MessageType(), value.Map())
+	case field.IsPacked():
+		return o.marshalPacked(b, num, kind, value.List())
+	default:
+		return o.marshalList(b, num, kind, value.List())
+	}
+}
+
+func (o MarshalOptions) marshalMap(b []byte, num wire.Number, kind protoreflect.Kind, mdesc protoreflect.MessageDescriptor, mapv protoreflect.Map) ([]byte, error) {
+	keyf := mdesc.Fields().ByNumber(1)
+	valf := mdesc.Fields().ByNumber(2)
+	var err error
+	o.rangeMap(mapv, keyf.Kind(), func(key protoreflect.MapKey, value protoreflect.Value) bool {
+		b = wire.AppendTag(b, num, wire.BytesType)
+		var pos int
+		b, pos = appendSpeculativeLength(b)
+
+		b, err = o.marshalField(b, keyf, key.Value())
+		if err != nil {
+			return false
+		}
+		b, err = o.marshalField(b, valf, value)
+		if err != nil {
+			return false
+		}
+
+		b = finishSpeculativeLength(b, pos)
+		return true
+	})
+	if err != nil {
+		return nil, err
+	}
+	return b, nil
+}
+
+func (o MarshalOptions) rangeMap(mapv protoreflect.Map, kind protoreflect.Kind, f func(protoreflect.MapKey, protoreflect.Value) bool) {
+	if !o.Deterministic {
+		mapv.Range(f)
+		return
+	}
+	mapsort.Range(mapv, kind, f)
+}
+
+func (o MarshalOptions) marshalPacked(b []byte, num wire.Number, kind protoreflect.Kind, list protoreflect.List) ([]byte, error) {
+	b = wire.AppendTag(b, num, wire.BytesType)
+	b, pos := appendSpeculativeLength(b)
+	for i, llen := 0, list.Len(); i < llen; i++ {
+		var err error
+		b, err = o.marshalSingular(b, num, kind, list.Get(i))
+		if err != nil {
+			return nil, err
+		}
+	}
+	b = finishSpeculativeLength(b, pos)
+	return b, nil
+}
+
+func (o MarshalOptions) marshalList(b []byte, num wire.Number, kind protoreflect.Kind, list protoreflect.List) ([]byte, error) {
+	for i, llen := 0, list.Len(); i < llen; i++ {
+		var err error
+		b = wire.AppendTag(b, num, wireTypes[kind])
+		b, err = o.marshalSingular(b, num, kind, list.Get(i))
+		if err != nil {
+			return nil, err
+		}
+	}
+	return b, nil
+}
+
+// When encoding length-prefixed fields, we speculatively set aside some number of bytes
+// for the length, encode the data, and then encode the length (shifting the data if necessary
+// to make room).
+const speculativeLength = 1
+
+func appendSpeculativeLength(b []byte) ([]byte, int) {
+	pos := len(b)
+	b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...)
+	return b, pos
+}
+
+func finishSpeculativeLength(b []byte, pos int) []byte {
+	mlen := len(b) - pos - speculativeLength
+	msiz := wire.SizeVarint(uint64(mlen))
+	if msiz != speculativeLength {
+		for i := 0; i < msiz-speculativeLength; i++ {
+			b = append(b, 0)
+		}
+		copy(b[pos+msiz:], b[pos+speculativeLength:])
+		b = b[:pos+msiz+mlen]
+	}
+	wire.AppendVarint(b[:pos], uint64(mlen))
+	return b
+}