internal/impl: add fast-path marshal implementation

This is a port of the v1 table marshaler, with some substantial
cleanup and refactoring.

Benchstat results from the protobuf reference benchmark data comparing the
v1 package with v2, with AllowPartial:true set for the new package. This
is not an apples-to-apples comparison, since v1 doesn't have a way to
disable required field checks.  Required field checks in v2 package
currently go through reflection, which performs terribly; my initial
experimentation indicates that fast-path required field checks will
not add a large amount of cost; these results are incomplete but not
wholly inaccurate.

name                                           old time/op  new time/op  delta
/dataset.google_message3_1.pb/Marshal-12        219ms ± 1%   232ms ± 1%   +5.85%  (p=0.004 n=6+5)
/dataset.google_message2.pb/Marshal-12          261µs ± 3%   248µs ± 1%   -5.14%  (p=0.002 n=6+6)
/dataset.google_message1_proto2.pb/Marshal-12   681ns ± 2%   637ns ± 3%   -6.53%  (p=0.002 n=6+6)
/dataset.google_message1_proto3.pb/Marshal-12  1.10µs ± 8%  0.99µs ± 3%   -9.63%  (p=0.002 n=6+6)
/dataset.google_message3_3.pb/Marshal-12       44.2ms ± 3%  35.2ms ± 1%  -20.28%  (p=0.004 n=6+5)
/dataset.google_message4.pb/Marshal-12         91.4ms ± 2%  94.9ms ± 2%   +3.78%  (p=0.002 n=6+6)
/dataset.google_message3_2.pb/Marshal-12       78.7ms ± 6%  80.8ms ± 4%     ~     (p=0.310 n=6+6)
/dataset.google_message3_4.pb/Marshal-12       10.6ms ± 3%  10.6ms ± 8%     ~     (p=0.662 n=5+6)
/dataset.google_message3_5.pb/Marshal-12        675ms ± 4%   510ms ± 2%  -24.40%  (p=0.002 n=6+6)
/dataset.google_message3_1.pb/Marshal           219ms ± 1%   236ms ± 7%   +8.06%  (p=0.004 n=5+6)
/dataset.google_message2.pb/Marshal             257µs ± 1%   250µs ± 3%     ~     (p=0.052 n=5+6)
/dataset.google_message1_proto2.pb/Marshal      685ns ± 1%   628ns ± 1%   -8.41%  (p=0.008 n=5+5)
/dataset.google_message1_proto3.pb/Marshal     1.08µs ± 1%  0.98µs ± 2%   -9.31%  (p=0.004 n=5+6)
/dataset.google_message3_3.pb/Marshal          43.7ms ± 1%  35.1ms ± 1%  -19.76%  (p=0.002 n=6+6)
/dataset.google_message4.pb/Marshal            93.4ms ± 4%  94.9ms ± 2%     ~     (p=0.180 n=6+6)
/dataset.google_message3_2.pb/Marshal           105ms ± 2%    98ms ± 7%   -6.81%  (p=0.009 n=5+6)
/dataset.google_message3_4.pb/Marshal          16.3ms ± 6%  15.7ms ± 3%   -3.44%  (p=0.041 n=6+6)
/dataset.google_message3_5.pb/Marshal           676ms ± 4%   504ms ± 2%  -25.50%  (p=0.004 n=6+5)

Change-Id: I72cc4597117f4cf5d236ef505777d49dd4a5f75d
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171020
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/proto/decode_test.go b/proto/decode_test.go
index 47dfdb4..074aeb6 100644
--- a/proto/decode_test.go
+++ b/proto/decode_test.go
@@ -18,6 +18,8 @@
 	pref "google.golang.org/protobuf/reflect/protoreflect"
 	"google.golang.org/protobuf/runtime/protolegacy"
 
+	legacypb "google.golang.org/protobuf/internal/testprotos/legacy"
+	legacy1pb "google.golang.org/protobuf/internal/testprotos/legacy/proto2.v0.0.0-20160225-2fc053c5"
 	testpb "google.golang.org/protobuf/internal/testprotos/test"
 	test3pb "google.golang.org/protobuf/internal/testprotos/test3"
 )
@@ -516,17 +518,20 @@
 		decodeTo: []proto.Message{&testpb.TestAllTypes{
 			RepeatedNestedMessage: []*testpb.TestAllTypes_NestedMessage{
 				{A: scalar.Int32(1)},
+				nil,
 				{A: scalar.Int32(2)},
 			},
 		}, &test3pb.TestAllTypes{
 			RepeatedNestedMessage: []*test3pb.TestAllTypes_NestedMessage{
 				{A: 1},
+				nil,
 				{A: 2},
 			},
 		}, build(
 			&testpb.TestAllExtensions{},
 			extend(testpb.E_RepeatedNestedMessageExtension, []*testpb.TestAllTypes_NestedMessage{
 				{A: scalar.Int32(1)},
+				nil,
 				{A: scalar.Int32(2)},
 			}),
 		)},
@@ -534,6 +539,7 @@
 			pack.Tag{48, pack.BytesType}, pack.LengthPrefix(pack.Message{
 				pack.Tag{1, pack.VarintType}, pack.Varint(1),
 			}),
+			pack.Tag{48, pack.BytesType}, pack.LengthPrefix(pack.Message{}),
 			pack.Tag{48, pack.BytesType}, pack.LengthPrefix(pack.Message{
 				pack.Tag{1, pack.VarintType}, pack.Varint(2),
 			}),
@@ -544,12 +550,14 @@
 		decodeTo: []proto.Message{&testpb.TestAllTypes{
 			Repeatedgroup: []*testpb.TestAllTypes_RepeatedGroup{
 				{A: scalar.Int32(1017)},
+				nil,
 				{A: scalar.Int32(2017)},
 			},
 		}, build(
 			&testpb.TestAllExtensions{},
 			extend(testpb.E_RepeatedgroupExtension, []*testpb.RepeatedGroupExtension{
 				{A: scalar.Int32(1017)},
+				nil,
 				{A: scalar.Int32(2017)},
 			}),
 		)},
@@ -558,6 +566,8 @@
 			pack.Tag{47, pack.VarintType}, pack.Varint(1017),
 			pack.Tag{46, pack.EndGroupType},
 			pack.Tag{46, pack.StartGroupType},
+			pack.Tag{46, pack.EndGroupType},
+			pack.Tag{46, pack.StartGroupType},
 			pack.Tag{47, pack.VarintType}, pack.Varint(2017),
 			pack.Tag{46, pack.EndGroupType},
 		}.Marshal(),
@@ -778,6 +788,18 @@
 		})}.Marshal(),
 	},
 	{
+		desc: "oneof (empty message)",
+		decodeTo: []proto.Message{
+			&testpb.TestAllTypes{OneofField: &testpb.TestAllTypes_OneofNestedMessage{
+				&testpb.TestAllTypes_NestedMessage{},
+			}},
+			&test3pb.TestAllTypes{OneofField: &test3pb.TestAllTypes_OneofNestedMessage{
+				&test3pb.TestAllTypes_NestedMessage{},
+			}},
+		},
+		wire: pack.Message{pack.Tag{112, pack.BytesType}, pack.LengthPrefix(pack.Message{})}.Marshal(),
+	},
+	{
 		desc: "oneof (overridden message)",
 		decodeTo: []proto.Message{
 			&testpb.TestAllTypes{OneofField: &testpb.TestAllTypes_OneofNestedMessage{
@@ -861,6 +883,14 @@
 		wire: pack.Message{pack.Tag{119, pack.VarintType}, pack.Varint(int(testpb.TestAllTypes_BAR))}.Marshal(),
 	},
 	{
+		desc: "oneof (zero)",
+		decodeTo: []proto.Message{
+			&testpb.TestAllTypes{OneofField: &testpb.TestAllTypes_OneofUint64{0}},
+			&test3pb.TestAllTypes{OneofField: &test3pb.TestAllTypes_OneofUint64{0}},
+		},
+		wire: pack.Message{pack.Tag{116, pack.VarintType}, pack.Varint(0)}.Marshal(),
+	},
+	{
 		desc: "oneof (overridden value)",
 		decodeTo: []proto.Message{
 			&testpb.TestAllTypes{OneofField: &testpb.TestAllTypes_OneofUint64{2}},
@@ -1175,6 +1205,65 @@
 			}),
 		}.Marshal(),
 	},
+	{
+		desc:    "legacy",
+		partial: true,
+		decodeTo: []proto.Message{
+			&legacypb.Legacy{
+				F1: &legacy1pb.Message{
+					OptionalInt32:     scalar.Int32(1),
+					OptionalChildEnum: legacy1pb.Message_ALPHA.Enum(),
+					OptionalChildMessage: &legacy1pb.Message_ChildMessage{
+						F1: scalar.String("x"),
+					},
+					Optionalgroup: &legacy1pb.Message_OptionalGroup{
+						F1: scalar.String("x"),
+					},
+					RepeatedChildMessage: []*legacy1pb.Message_ChildMessage{
+						{F1: scalar.String("x")},
+					},
+					Repeatedgroup: []*legacy1pb.Message_RepeatedGroup{
+						{F1: scalar.String("x")},
+					},
+					MapBoolChildMessage: map[bool]*legacy1pb.Message_ChildMessage{
+						true: {F1: scalar.String("x")},
+					},
+					OneofUnion: &legacy1pb.Message_OneofChildMessage{
+						&legacy1pb.Message_ChildMessage{
+							F1: scalar.String("x"),
+						},
+					},
+				},
+			},
+		},
+		wire: pack.Message{
+			pack.Tag{1, pack.BytesType}, pack.LengthPrefix(pack.Message{
+				pack.Tag{101, pack.VarintType}, pack.Varint(1),
+				pack.Tag{115, pack.VarintType}, pack.Varint(0),
+				pack.Tag{116, pack.BytesType}, pack.LengthPrefix(pack.Message{
+					pack.Tag{1, pack.BytesType}, pack.String("x"),
+				}),
+				pack.Tag{120, pack.StartGroupType},
+				pack.Tag{1, pack.BytesType}, pack.String("x"),
+				pack.Tag{120, pack.EndGroupType},
+				pack.Tag{516, pack.BytesType}, pack.LengthPrefix(pack.Message{
+					pack.Tag{1, pack.BytesType}, pack.String("x"),
+				}),
+				pack.Tag{520, pack.StartGroupType},
+				pack.Tag{1, pack.BytesType}, pack.String("x"),
+				pack.Tag{520, pack.EndGroupType},
+				pack.Tag{616, pack.BytesType}, pack.LengthPrefix(pack.Message{
+					pack.Tag{1, pack.VarintType}, pack.Varint(1),
+					pack.Tag{2, pack.BytesType}, pack.LengthPrefix(pack.Message{
+						pack.Tag{1, pack.BytesType}, pack.String("x"),
+					}),
+				}),
+				pack.Tag{716, pack.BytesType}, pack.LengthPrefix(pack.Message{
+					pack.Tag{1, pack.BytesType}, pack.String("x"),
+				}),
+			}),
+		}.Marshal(),
+	},
 }
 
 var invalidUTF8TestProtos = []testProto{
@@ -1215,6 +1304,13 @@
 		}.Marshal(),
 	},
 	{
+		desc: "invalid UTF-8 in oneof field",
+		decodeTo: []proto.Message{
+			&test3pb.TestAllTypes{OneofField: &test3pb.TestAllTypes_OneofString{"abc\xff"}},
+		},
+		wire: pack.Message{pack.Tag{113, pack.BytesType}, pack.String("abc\xff")}.Marshal(),
+	},
+	{
 		desc: "invalid UTF-8 in map key",
 		decodeTo: []proto.Message{&test3pb.TestAllTypes{
 			MapStringString: map[string]string{"key\xff": "val"},