internal/impl: add fast-path unmarshal

Benchmarks run with:
  go test ./benchmarks/ -bench=Wire  -benchtime=500ms -benchmem -count=8

Fast-path vs. parent commit:

  name                                      old time/op    new time/op    delta
  Wire/Unmarshal/google_message1_proto2-12    1.35µs ± 2%    0.45µs ± 4%  -67.01%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message1_proto3-12    1.07µs ± 1%    0.31µs ± 1%  -71.04%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message2-12            691µs ± 2%     188µs ± 2%  -72.78%  (p=0.000 n=7+8)

  name                                      old allocs/op  new allocs/op  delta
  Wire/Unmarshal/google_message1_proto2-12      60.0 ± 0%      25.0 ± 0%  -58.33%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message1_proto3-12      42.0 ± 0%       7.0 ± 0%  -83.33%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message2-12            28.6k ± 0%      8.5k ± 0%  -70.34%  (p=0.000 n=8+8)

Fast-path vs. -v1:

  name                                      old time/op    new time/op    delta
  Wire/Unmarshal/google_message1_proto2-12     702ns ± 1%     445ns ± 4%   -36.58%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message1_proto3-12     604ns ± 1%     311ns ± 1%   -48.54%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message2-12            179µs ± 3%     188µs ± 2%    +5.30%  (p=0.000 n=7+8)

  name                                      old allocs/op  new allocs/op  delta
  Wire/Unmarshal/google_message1_proto2-12      26.0 ± 0%      25.0 ± 0%    -3.85%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message1_proto3-12      8.00 ± 0%      7.00 ± 0%   -12.50%  (p=0.000 n=8+8)
  Wire/Unmarshal/google_message2-12            8.49k ± 0%     8.49k ± 0%    -0.01%  (p=0.000 n=8+8)

Change-Id: I6247ac3fd66a63d9acb902cbd192094ee3d151c3
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/185147
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/internal/impl/codec_message.go b/internal/impl/codec_message.go
index e4e3706..61c88c1 100644
--- a/internal/impl/codec_message.go
+++ b/internal/impl/codec_message.go
@@ -18,6 +18,8 @@
 // possible.
 type coderMessageInfo struct {
 	orderedCoderFields []*coderFieldInfo
+	denseCoderFields   []*coderFieldInfo
+	coderFields        map[wire.Number]*coderFieldInfo
 	sizecacheOffset    offset
 	unknownOffset      offset
 	extensionOffset    offset
@@ -39,13 +41,14 @@
 	mi.unknownOffset = si.unknownOffset
 	mi.extensionOffset = si.extensionOffset
 
+	mi.coderFields = make(map[wire.Number]*coderFieldInfo)
 	for i := 0; i < mi.PBType.Descriptor().Fields().Len(); i++ {
 		fd := mi.PBType.Descriptor().Fields().Get(i)
-		if fd.ContainingOneof() != nil {
-			continue
-		}
 
 		fs := si.fieldsByNumber[fd.Number()]
+		if fd.ContainingOneof() != nil {
+			fs = si.oneofsByName[fd.ContainingOneof().Name()]
+		}
 		ft := fs.Type
 		var wiretag uint64
 		if !fd.IsPacked() {
@@ -53,37 +56,51 @@
 		} else {
 			wiretag = wire.EncodeTag(fd.Number(), wire.BytesType)
 		}
-		mi.orderedCoderFields = append(mi.orderedCoderFields, &coderFieldInfo{
+		var funcs pointerCoderFuncs
+		if fd.ContainingOneof() != nil {
+			funcs = makeOneofFieldCoder(si, fd)
+		} else {
+			funcs = fieldCoder(fd, ft)
+		}
+		cf := &coderFieldInfo{
 			num:     fd.Number(),
 			offset:  offsetOf(fs, mi.Exporter),
 			wiretag: wiretag,
 			tagsize: wire.SizeVarint(wiretag),
-			funcs:   fieldCoder(fd, ft),
+			funcs:   funcs,
 			isPointer: (fd.Cardinality() == pref.Repeated ||
 				fd.Kind() == pref.MessageKind ||
 				fd.Kind() == pref.GroupKind ||
 				fd.Syntax() != pref.Proto3),
 			isRequired: fd.Cardinality() == pref.Required,
-		})
-	}
-	for i := 0; i < mi.PBType.Descriptor().Oneofs().Len(); i++ {
-		od := mi.PBType.Descriptor().Oneofs().Get(i)
-		fs := si.oneofsByName[od.Name()]
-		mi.orderedCoderFields = append(mi.orderedCoderFields, &coderFieldInfo{
-			num:       od.Fields().Get(0).Number(),
-			offset:    offsetOf(fs, mi.Exporter),
-			funcs:     makeOneofFieldCoder(fs, od, si.fieldsByNumber, si.oneofWrappersByNumber),
-			isPointer: true,
-		})
+		}
+		mi.orderedCoderFields = append(mi.orderedCoderFields, cf)
+		mi.coderFields[cf.num] = cf
 	}
 	sort.Slice(mi.orderedCoderFields, func(i, j int) bool {
 		return mi.orderedCoderFields[i].num < mi.orderedCoderFields[j].num
 	})
 
+	var maxDense pref.FieldNumber
+	for _, cf := range mi.orderedCoderFields {
+		if cf.num >= 16 && cf.num >= 2*maxDense {
+			break
+		}
+		maxDense = cf.num
+	}
+	mi.denseCoderFields = make([]*coderFieldInfo, maxDense+1)
+	for _, cf := range mi.orderedCoderFields {
+		if int(cf.num) > len(mi.denseCoderFields) {
+			break
+		}
+		mi.denseCoderFields[cf.num] = cf
+	}
+
 	mi.needsInitCheck = needsInitCheck(mi.PBType)
 	mi.methods = piface.Methods{
 		Flags:         piface.MethodFlagDeterministicMarshal,
 		MarshalAppend: mi.marshalAppend,
+		Unmarshal:     mi.unmarshal,
 		Size:          mi.size,
 		IsInitialized: mi.isInitialized,
 	}