internal/impl: add fast-path marshal implementation This is a port of the v1 table marshaler, with some substantial cleanup and refactoring. Benchstat results from the protobuf reference benchmark data comparing the v1 package with v2, with AllowPartial:true set for the new package. This is not an apples-to-apples comparison, since v1 doesn't have a way to disable required field checks. Required field checks in v2 package currently go through reflection, which performs terribly; my initial experimentation indicates that fast-path required field checks will not add a large amount of cost; these results are incomplete but not wholly inaccurate. name old time/op new time/op delta /dataset.google_message3_1.pb/Marshal-12 219ms ± 1% 232ms ± 1% +5.85% (p=0.004 n=6+5) /dataset.google_message2.pb/Marshal-12 261µs ± 3% 248µs ± 1% -5.14% (p=0.002 n=6+6) /dataset.google_message1_proto2.pb/Marshal-12 681ns ± 2% 637ns ± 3% -6.53% (p=0.002 n=6+6) /dataset.google_message1_proto3.pb/Marshal-12 1.10µs ± 8% 0.99µs ± 3% -9.63% (p=0.002 n=6+6) /dataset.google_message3_3.pb/Marshal-12 44.2ms ± 3% 35.2ms ± 1% -20.28% (p=0.004 n=6+5) /dataset.google_message4.pb/Marshal-12 91.4ms ± 2% 94.9ms ± 2% +3.78% (p=0.002 n=6+6) /dataset.google_message3_2.pb/Marshal-12 78.7ms ± 6% 80.8ms ± 4% ~ (p=0.310 n=6+6) /dataset.google_message3_4.pb/Marshal-12 10.6ms ± 3% 10.6ms ± 8% ~ (p=0.662 n=5+6) /dataset.google_message3_5.pb/Marshal-12 675ms ± 4% 510ms ± 2% -24.40% (p=0.002 n=6+6) /dataset.google_message3_1.pb/Marshal 219ms ± 1% 236ms ± 7% +8.06% (p=0.004 n=5+6) /dataset.google_message2.pb/Marshal 257µs ± 1% 250µs ± 3% ~ (p=0.052 n=5+6) /dataset.google_message1_proto2.pb/Marshal 685ns ± 1% 628ns ± 1% -8.41% (p=0.008 n=5+5) /dataset.google_message1_proto3.pb/Marshal 1.08µs ± 1% 0.98µs ± 2% -9.31% (p=0.004 n=5+6) /dataset.google_message3_3.pb/Marshal 43.7ms ± 1% 35.1ms ± 1% -19.76% (p=0.002 n=6+6) /dataset.google_message4.pb/Marshal 93.4ms ± 4% 94.9ms ± 2% ~ (p=0.180 n=6+6) /dataset.google_message3_2.pb/Marshal 105ms ± 2% 98ms ± 7% -6.81% (p=0.009 n=5+6) /dataset.google_message3_4.pb/Marshal 16.3ms ± 6% 15.7ms ± 3% -3.44% (p=0.041 n=6+6) /dataset.google_message3_5.pb/Marshal 676ms ± 4% 504ms ± 2% -25.50% (p=0.004 n=6+5) Change-Id: I72cc4597117f4cf5d236ef505777d49dd4a5f75d Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171020 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>

commit: c37adefdac3634ee9b6f9494ff75001b93d01c28 [log] [tgz]
author: Damien Neil <dneil@google.com> Mon Apr 01 13:49:56 2019 -0700
committer: Damien Neil <dneil@google.com> Thu May 16 22:13:43 2019 +0000
tree: c33826c923260266ebbb9ea383ff1e9e896da250
parent: 5c62f675c54b8781b00a44070ed6f6a5d55fa4a6 [diff] [blame]
diff --git a/internal/impl/encode.go b/internal/impl/encode.go
new file mode 100644
index 0000000..c0fce6c
--- /dev/null
+++ b/internal/impl/encode.go

@@ -0,0 +1,183 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package impl
+
+import (
+	"sort"
+	"sync/atomic"
+
+	"google.golang.org/protobuf/internal/errors"
+	proto "google.golang.org/protobuf/proto"
+	pref "google.golang.org/protobuf/reflect/protoreflect"
+	piface "google.golang.org/protobuf/runtime/protoiface"
+)
+
+type marshalOptions uint
+
+const (
+	marshalAllowPartial marshalOptions = 1 << iota
+	marshalDeterministic
+	marshalUseCachedSize
+)
+
+func newMarshalOptions(opts piface.MarshalOptions) marshalOptions {
+	var o marshalOptions
+	if opts.AllowPartial {
+		o |= marshalAllowPartial
+	}
+	if opts.Deterministic {
+		o |= marshalDeterministic
+	}
+	if opts.UseCachedSize {
+		o |= marshalUseCachedSize
+	}
+	return o
+}
+
+func (o marshalOptions) Options() proto.MarshalOptions {
+	return proto.MarshalOptions{
+		AllowPartial:  o.AllowPartial(),
+		Deterministic: o.Deterministic(),
+		UseCachedSize: o.UseCachedSize(),
+	}
+}
+
+func (o marshalOptions) AllowPartial() bool  { return o&marshalAllowPartial != 0 }
+func (o marshalOptions) Deterministic() bool { return o&marshalDeterministic != 0 }
+func (o marshalOptions) UseCachedSize() bool { return o&marshalUseCachedSize != 0 }
+
+// size is protoreflect.Methods.Size.
+func (mi *MessageType) size(msg pref.ProtoMessage) (size int) {
+	return mi.sizePointer(pointerOfIface(msg), 0)
+}
+
+func (mi *MessageType) sizePointer(p pointer, opts marshalOptions) (size int) {
+	mi.init()
+	if p.IsNil() {
+		return 0
+	}
+	if opts.UseCachedSize() && mi.sizecacheOffset.IsValid() {
+		return int(atomic.LoadInt32(p.Apply(mi.sizecacheOffset).Int32()))
+	}
+	return mi.sizePointerSlow(p, opts)
+}
+
+func (mi *MessageType) sizePointerSlow(p pointer, opts marshalOptions) (size int) {
+	if mi.extensionOffset.IsValid() {
+		e := p.Apply(mi.extensionOffset).Extensions()
+		size += mi.sizeExtensions(e, opts)
+	}
+	for _, f := range mi.fieldsOrdered {
+		fptr := p.Apply(f.offset)
+		if f.isPointer && fptr.Elem().IsNil() {
+			continue
+		}
+		if f.funcs.size == nil {
+			continue
+		}
+		size += f.funcs.size(fptr, f.tagsize, opts)
+	}
+	if mi.unknownOffset.IsValid() {
+		u := *p.Apply(mi.unknownOffset).Bytes()
+		size += len(u)
+	}
+	if mi.sizecacheOffset.IsValid() {
+		atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), int32(size))
+	}
+	return size
+}
+
+// marshalAppend is protoreflect.Methods.MarshalAppend.
+func (mi *MessageType) marshalAppend(b []byte, msg pref.ProtoMessage, opts piface.MarshalOptions) ([]byte, error) {
+	return mi.marshalAppendPointer(b, pointerOfIface(msg), newMarshalOptions(opts))
+}
+
+func (mi *MessageType) marshalAppendPointer(b []byte, p pointer, opts marshalOptions) ([]byte, error) {
+	mi.init()
+	if p.IsNil() {
+		return b, nil
+	}
+	var err error
+	var nerr errors.NonFatal
+	// The old marshaler encodes extensions at beginning.
+	if mi.extensionOffset.IsValid() {
+		e := p.Apply(mi.extensionOffset).Extensions()
+		// TODO: Special handling for MessageSet?
+		b, err = mi.appendExtensions(b, e, opts)
+		if !nerr.Merge(err) {
+			return b, err
+		}
+	}
+	for _, f := range mi.fieldsOrdered {
+		fptr := p.Apply(f.offset)
+		if f.isPointer && fptr.Elem().IsNil() {
+			continue
+		}
+		if f.funcs.marshal == nil {
+			continue
+		}
+		b, err = f.funcs.marshal(b, fptr, f.wiretag, opts)
+		if !nerr.Merge(err) {
+			return b, err
+		}
+	}
+	if mi.unknownOffset.IsValid() {
+		u := *p.Apply(mi.unknownOffset).Bytes()
+		b = append(b, u...)
+	}
+	return b, nerr.E
+}
+
+func (mi *MessageType) sizeExtensions(ext *legacyExtensionMap, opts marshalOptions) (n int) {
+	if ext == nil {
+		return 0
+	}
+	for _, e := range *ext {
+		ei := mi.extensionFieldInfo(e.Desc)
+		if ei.funcs.size == nil {
+			continue
+		}
+		n += ei.funcs.size(e.value, ei.tagsize, opts)
+	}
+	return n
+}
+
+func (mi *MessageType) appendExtensions(b []byte, ext *legacyExtensionMap, opts marshalOptions) ([]byte, error) {
+	if ext == nil {
+		return b, nil
+	}
+
+	switch len(*ext) {
+	case 0:
+		return b, nil
+	case 1:
+		// Fast-path for one extension: Don't bother sorting the keys.
+		var err error
+		for _, e := range *ext {
+			ei := mi.extensionFieldInfo(e.Desc)
+			b, err = ei.funcs.marshal(b, e.value, ei.wiretag, opts)
+		}
+		return b, err
+	default:
+		// Sort the keys to provide a deterministic encoding.
+		// Not sure this is required, but the old code does it.
+		keys := make([]int, 0, len(*ext))
+		for k := range *ext {
+			keys = append(keys, int(k))
+		}
+		sort.Ints(keys)
+		var err error
+		var nerr errors.NonFatal
+		for _, k := range keys {
+			e := (*ext)[int32(k)]
+			ei := mi.extensionFieldInfo(e.Desc)
+			b, err = ei.funcs.marshal(b, e.value, ei.wiretag, opts)
+			if !nerr.Merge(err) {
+				return b, err
+			}
+		}
+		return b, nerr.E
+	}
+}
commit	c37adefdac3634ee9b6f9494ff75001b93d01c28	[log] [tgz]
author	Damien Neil <dneil@google.com>	Mon Apr 01 13:49:56 2019 -0700
committer	Damien Neil <dneil@google.com>	Thu May 16 22:13:43 2019 +0000
tree	c33826c923260266ebbb9ea383ff1e9e896da250
parent	5c62f675c54b8781b00a44070ed6f6a5d55fa4a6 [diff] [blame]