internal/impl: add fast-path marshal implementation
This is a port of the v1 table marshaler, with some substantial
cleanup and refactoring.
Benchstat results from the protobuf reference benchmark data comparing the
v1 package with v2, with AllowPartial:true set for the new package. This
is not an apples-to-apples comparison, since v1 doesn't have a way to
disable required field checks. Required field checks in v2 package
currently go through reflection, which performs terribly; my initial
experimentation indicates that fast-path required field checks will
not add a large amount of cost; these results are incomplete but not
wholly inaccurate.
name old time/op new time/op delta
/dataset.google_message3_1.pb/Marshal-12 219ms ± 1% 232ms ± 1% +5.85% (p=0.004 n=6+5)
/dataset.google_message2.pb/Marshal-12 261µs ± 3% 248µs ± 1% -5.14% (p=0.002 n=6+6)
/dataset.google_message1_proto2.pb/Marshal-12 681ns ± 2% 637ns ± 3% -6.53% (p=0.002 n=6+6)
/dataset.google_message1_proto3.pb/Marshal-12 1.10µs ± 8% 0.99µs ± 3% -9.63% (p=0.002 n=6+6)
/dataset.google_message3_3.pb/Marshal-12 44.2ms ± 3% 35.2ms ± 1% -20.28% (p=0.004 n=6+5)
/dataset.google_message4.pb/Marshal-12 91.4ms ± 2% 94.9ms ± 2% +3.78% (p=0.002 n=6+6)
/dataset.google_message3_2.pb/Marshal-12 78.7ms ± 6% 80.8ms ± 4% ~ (p=0.310 n=6+6)
/dataset.google_message3_4.pb/Marshal-12 10.6ms ± 3% 10.6ms ± 8% ~ (p=0.662 n=5+6)
/dataset.google_message3_5.pb/Marshal-12 675ms ± 4% 510ms ± 2% -24.40% (p=0.002 n=6+6)
/dataset.google_message3_1.pb/Marshal 219ms ± 1% 236ms ± 7% +8.06% (p=0.004 n=5+6)
/dataset.google_message2.pb/Marshal 257µs ± 1% 250µs ± 3% ~ (p=0.052 n=5+6)
/dataset.google_message1_proto2.pb/Marshal 685ns ± 1% 628ns ± 1% -8.41% (p=0.008 n=5+5)
/dataset.google_message1_proto3.pb/Marshal 1.08µs ± 1% 0.98µs ± 2% -9.31% (p=0.004 n=5+6)
/dataset.google_message3_3.pb/Marshal 43.7ms ± 1% 35.1ms ± 1% -19.76% (p=0.002 n=6+6)
/dataset.google_message4.pb/Marshal 93.4ms ± 4% 94.9ms ± 2% ~ (p=0.180 n=6+6)
/dataset.google_message3_2.pb/Marshal 105ms ± 2% 98ms ± 7% -6.81% (p=0.009 n=5+6)
/dataset.google_message3_4.pb/Marshal 16.3ms ± 6% 15.7ms ± 3% -3.44% (p=0.041 n=6+6)
/dataset.google_message3_5.pb/Marshal 676ms ± 4% 504ms ± 2% -25.50% (p=0.004 n=6+5)
Change-Id: I72cc4597117f4cf5d236ef505777d49dd4a5f75d
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171020
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/internal/impl/encode.go b/internal/impl/encode.go
new file mode 100644
index 0000000..c0fce6c
--- /dev/null
+++ b/internal/impl/encode.go
@@ -0,0 +1,183 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package impl
+
+import (
+ "sort"
+ "sync/atomic"
+
+ "google.golang.org/protobuf/internal/errors"
+ proto "google.golang.org/protobuf/proto"
+ pref "google.golang.org/protobuf/reflect/protoreflect"
+ piface "google.golang.org/protobuf/runtime/protoiface"
+)
+
+type marshalOptions uint
+
+const (
+ marshalAllowPartial marshalOptions = 1 << iota
+ marshalDeterministic
+ marshalUseCachedSize
+)
+
+func newMarshalOptions(opts piface.MarshalOptions) marshalOptions {
+ var o marshalOptions
+ if opts.AllowPartial {
+ o |= marshalAllowPartial
+ }
+ if opts.Deterministic {
+ o |= marshalDeterministic
+ }
+ if opts.UseCachedSize {
+ o |= marshalUseCachedSize
+ }
+ return o
+}
+
+func (o marshalOptions) Options() proto.MarshalOptions {
+ return proto.MarshalOptions{
+ AllowPartial: o.AllowPartial(),
+ Deterministic: o.Deterministic(),
+ UseCachedSize: o.UseCachedSize(),
+ }
+}
+
+func (o marshalOptions) AllowPartial() bool { return o&marshalAllowPartial != 0 }
+func (o marshalOptions) Deterministic() bool { return o&marshalDeterministic != 0 }
+func (o marshalOptions) UseCachedSize() bool { return o&marshalUseCachedSize != 0 }
+
+// size is protoreflect.Methods.Size.
+func (mi *MessageType) size(msg pref.ProtoMessage) (size int) {
+ return mi.sizePointer(pointerOfIface(msg), 0)
+}
+
+func (mi *MessageType) sizePointer(p pointer, opts marshalOptions) (size int) {
+ mi.init()
+ if p.IsNil() {
+ return 0
+ }
+ if opts.UseCachedSize() && mi.sizecacheOffset.IsValid() {
+ return int(atomic.LoadInt32(p.Apply(mi.sizecacheOffset).Int32()))
+ }
+ return mi.sizePointerSlow(p, opts)
+}
+
+func (mi *MessageType) sizePointerSlow(p pointer, opts marshalOptions) (size int) {
+ if mi.extensionOffset.IsValid() {
+ e := p.Apply(mi.extensionOffset).Extensions()
+ size += mi.sizeExtensions(e, opts)
+ }
+ for _, f := range mi.fieldsOrdered {
+ fptr := p.Apply(f.offset)
+ if f.isPointer && fptr.Elem().IsNil() {
+ continue
+ }
+ if f.funcs.size == nil {
+ continue
+ }
+ size += f.funcs.size(fptr, f.tagsize, opts)
+ }
+ if mi.unknownOffset.IsValid() {
+ u := *p.Apply(mi.unknownOffset).Bytes()
+ size += len(u)
+ }
+ if mi.sizecacheOffset.IsValid() {
+ atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), int32(size))
+ }
+ return size
+}
+
+// marshalAppend is protoreflect.Methods.MarshalAppend.
+func (mi *MessageType) marshalAppend(b []byte, msg pref.ProtoMessage, opts piface.MarshalOptions) ([]byte, error) {
+ return mi.marshalAppendPointer(b, pointerOfIface(msg), newMarshalOptions(opts))
+}
+
+func (mi *MessageType) marshalAppendPointer(b []byte, p pointer, opts marshalOptions) ([]byte, error) {
+ mi.init()
+ if p.IsNil() {
+ return b, nil
+ }
+ var err error
+ var nerr errors.NonFatal
+ // The old marshaler encodes extensions at beginning.
+ if mi.extensionOffset.IsValid() {
+ e := p.Apply(mi.extensionOffset).Extensions()
+ // TODO: Special handling for MessageSet?
+ b, err = mi.appendExtensions(b, e, opts)
+ if !nerr.Merge(err) {
+ return b, err
+ }
+ }
+ for _, f := range mi.fieldsOrdered {
+ fptr := p.Apply(f.offset)
+ if f.isPointer && fptr.Elem().IsNil() {
+ continue
+ }
+ if f.funcs.marshal == nil {
+ continue
+ }
+ b, err = f.funcs.marshal(b, fptr, f.wiretag, opts)
+ if !nerr.Merge(err) {
+ return b, err
+ }
+ }
+ if mi.unknownOffset.IsValid() {
+ u := *p.Apply(mi.unknownOffset).Bytes()
+ b = append(b, u...)
+ }
+ return b, nerr.E
+}
+
+func (mi *MessageType) sizeExtensions(ext *legacyExtensionMap, opts marshalOptions) (n int) {
+ if ext == nil {
+ return 0
+ }
+ for _, e := range *ext {
+ ei := mi.extensionFieldInfo(e.Desc)
+ if ei.funcs.size == nil {
+ continue
+ }
+ n += ei.funcs.size(e.value, ei.tagsize, opts)
+ }
+ return n
+}
+
+func (mi *MessageType) appendExtensions(b []byte, ext *legacyExtensionMap, opts marshalOptions) ([]byte, error) {
+ if ext == nil {
+ return b, nil
+ }
+
+ switch len(*ext) {
+ case 0:
+ return b, nil
+ case 1:
+ // Fast-path for one extension: Don't bother sorting the keys.
+ var err error
+ for _, e := range *ext {
+ ei := mi.extensionFieldInfo(e.Desc)
+ b, err = ei.funcs.marshal(b, e.value, ei.wiretag, opts)
+ }
+ return b, err
+ default:
+ // Sort the keys to provide a deterministic encoding.
+ // Not sure this is required, but the old code does it.
+ keys := make([]int, 0, len(*ext))
+ for k := range *ext {
+ keys = append(keys, int(k))
+ }
+ sort.Ints(keys)
+ var err error
+ var nerr errors.NonFatal
+ for _, k := range keys {
+ e := (*ext)[int32(k)]
+ ei := mi.extensionFieldInfo(e.Desc)
+ b, err = ei.funcs.marshal(b, e.value, ei.wiretag, opts)
+ if !nerr.Merge(err) {
+ return b, err
+ }
+ }
+ return b, nerr.E
+ }
+}