goprotobuf: Change Size implementation to use the same code structure as Marshal (encode).

This is much faster (2x-4x), and makes zero allocations.

R=r
CC=golang-dev
https://codereview.appspot.com/14430057
diff --git a/proto/all_test.go b/proto/all_test.go
index 9ae811e..50a537b 100644
--- a/proto/all_test.go
+++ b/proto/all_test.go
@@ -1747,78 +1747,122 @@
 	Unmarshal(data, pb)
 }
 
-func benchmarkMsg(bytes bool) *GoTest {
+// Benchmarks
+
+func testMsg() *GoTest {
 	pb := initGoTest(true)
-	if bytes {
-		buf := make([]byte, 4000)
-		for i := range buf {
-			buf[i] = byte(i)
-		}
-		pb.F_BytesDefaulted = buf
-	} else {
-		const N = 1000 // Internally the library starts much smaller.
-		pb.F_Int32Repeated = make([]int32, N)
-		pb.F_DoubleRepeated = make([]float64, N)
-		for i := 0; i < N; i++ {
-			pb.F_Int32Repeated[i] = int32(i)
-			pb.F_DoubleRepeated[i] = float64(i)
-		}
+	const N = 1000 // Internally the library starts much smaller.
+	pb.F_Int32Repeated = make([]int32, N)
+	pb.F_DoubleRepeated = make([]float64, N)
+	for i := 0; i < N; i++ {
+		pb.F_Int32Repeated[i] = int32(i)
+		pb.F_DoubleRepeated[i] = float64(i)
 	}
 	return pb
 }
 
-func BenchmarkMarshal(b *testing.B) {
-	pb := benchmarkMsg(false)
+func bytesMsg() *GoTest {
+	pb := initGoTest(true)
+	buf := make([]byte, 4000)
+	for i := range buf {
+		buf[i] = byte(i)
+	}
+	pb.F_BytesDefaulted = buf
+	return pb
+}
+
+func benchmarkMarshal(b *testing.B, pb Message, marshal func(Message) ([]byte, error)) {
+	d, _ := marshal(pb)
+	b.SetBytes(int64(len(d)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		marshal(pb)
+	}
+}
+
+func benchmarkBufferMarshal(b *testing.B, pb Message) {
 	p := NewBuffer(nil)
+	benchmarkMarshal(b, pb, func(pb0 Message) ([]byte, error) {
+		p.Reset()
+		err := p.Marshal(pb0)
+		return p.Bytes(), err
+	})
+}
+
+func benchmarkSize(b *testing.B, pb Message) {
+	benchmarkMarshal(b, pb, func(pb0 Message) ([]byte, error) {
+		Size(pb)
+		return nil, nil
+	})
+}
+
+func newOf(pb Message) Message {
+	in := reflect.ValueOf(pb)
+	if in.IsNil() {
+		return pb
+	}
+	return reflect.New(in.Type().Elem()).Interface().(Message)
+}
+
+func benchmarkUnmarshal(b *testing.B, pb Message, unmarshal func([]byte, Message) error) {
+	d, _ := Marshal(pb)
+	b.SetBytes(int64(len(d)))
+	pbd := newOf(pb)
 
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		p.Reset()
-		p.Marshal(pb)
+		unmarshal(d, pbd)
 	}
-	b.SetBytes(int64(len(p.Bytes())))
+}
+
+func benchmarkBufferUnmarshal(b *testing.B, pb Message) {
+	p := NewBuffer(nil)
+	benchmarkUnmarshal(b, pb, func(d []byte, pb0 Message) error {
+		p.SetBuf(d)
+		return p.Unmarshal(pb0)
+	})
+}
+
+// Benchmark{Marshal,BufferMarshal,Size,Unmarshal,BufferUnmarshal}{,Bytes}
+
+func BenchmarkMarshal(b *testing.B) {
+	benchmarkMarshal(b, testMsg(), Marshal)
+}
+
+func BenchmarkBufferMarshal(b *testing.B) {
+	benchmarkBufferMarshal(b, testMsg())
+}
+
+func BenchmarkSize(b *testing.B) {
+	benchmarkSize(b, testMsg())
 }
 
 func BenchmarkUnmarshal(b *testing.B) {
-	pb := benchmarkMsg(false)
-	p := NewBuffer(nil)
-	p.Marshal(pb)
-	b.SetBytes(int64(len(p.Bytes())))
-	p2 := NewBuffer(nil)
-	pbd := new(GoTest)
+	benchmarkUnmarshal(b, testMsg(), Unmarshal)
+}
 
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		p2.SetBuf(p.Bytes())
-		p2.Unmarshal(pbd)
-	}
+func BenchmarkBufferUnmarshal(b *testing.B) {
+	benchmarkBufferUnmarshal(b, testMsg())
 }
 
 func BenchmarkMarshalBytes(b *testing.B) {
-	pb := benchmarkMsg(true)
-	p := NewBuffer(nil)
+	benchmarkMarshal(b, bytesMsg(), Marshal)
+}
 
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		p.Reset()
-		p.Marshal(pb)
-	}
-	b.SetBytes(int64(len(p.Bytes())))
+func BenchmarkBufferMarshalBytes(b *testing.B) {
+	benchmarkBufferMarshal(b, bytesMsg())
+}
+
+func BenchmarkSizeBytes(b *testing.B) {
+	benchmarkSize(b, bytesMsg())
 }
 
 func BenchmarkUnmarshalBytes(b *testing.B) {
-	pb := benchmarkMsg(true)
-	p := NewBuffer(nil)
-	p.Marshal(pb)
-	b.SetBytes(int64(len(p.Bytes())))
-	p2 := NewBuffer(nil)
-	pbd := new(GoTest)
+	benchmarkUnmarshal(b, bytesMsg(), Unmarshal)
+}
 
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		p2.SetBuf(p.Bytes())
-		p2.Unmarshal(pbd)
-	}
+func BenchmarkBufferUnmarshalBytes(b *testing.B) {
+	benchmarkBufferUnmarshal(b, bytesMsg())
 }
 
 func BenchmarkUnmarshalUnrecognizedFields(b *testing.B) {