A few performance tweaks of the ICodedOutputStream interface/implementation
diff --git a/src/ProtoBench/Program.cs b/src/ProtoBench/Program.cs
index 6203486..14294a4 100644
--- a/src/ProtoBench/Program.cs
+++ b/src/ProtoBench/Program.cs
@@ -35,8 +35,10 @@
 #endregion

 

 using System;

+using System.Collections.Generic;

 using System.Diagnostics;

 using System.IO;

+using System.Threading;

 

 namespace Google.ProtocolBuffers.ProtoBench

 {

@@ -45,20 +47,37 @@
     /// </summary>

     public sealed class Program

     {

-        private static readonly TimeSpan MinSampleTime = TimeSpan.FromSeconds(2);

-        private static readonly TimeSpan TargetTime = TimeSpan.FromSeconds(30);

-

+        private static TimeSpan MinSampleTime = TimeSpan.FromSeconds(2);

+        private static TimeSpan TargetTime = TimeSpan.FromSeconds(30);

+        private static bool FastTest = false;

+        private static bool Verbose = false;

         // Avoid a .NET 3.5 dependency

         private delegate void Action();

 

+        private delegate void BenchmarkTest(string name, long dataSize, Action action);

+

+        private static BenchmarkTest RunBenchmark;

+

         public static int Main(string[] args)

         {

+            List<string> temp = new List<string>(args);

+

+            FastTest = temp.Remove("/fast") || temp.Remove("-fast");

+            Verbose = temp.Remove("/verbose") || temp.Remove("-verbose");

+            

+            RunBenchmark = BenchmarkV1;

+            if (temp.Remove("/v2") || temp.Remove("-v2"))

+                RunBenchmark = BenchmarkV2;

+

+            args = temp.ToArray();

+

+

+

             if (args.Length < 2 || (args.Length%2) != 0)

             {

-                Console.Error.WriteLine("Usage: ProtoBench <descriptor type name> <input data>");

+                Console.Error.WriteLine("Usage: ProtoBench [/fast] <descriptor type name> <input data>");

                 Console.Error.WriteLine("The descriptor type name is the fully-qualified message name,");

-                Console.Error.WriteLine(

-                    "including assembly - e.g. Google.ProtocolBuffers.BenchmarkProtos.Message1,ProtoBench");

+                Console.Error.WriteLine("including assembly - e.g. Google.ProtocolBuffers.BenchmarkProtos.Message1,ProtoBench");

                 Console.Error.WriteLine("(You can specify multiple pairs of descriptor type name and input data.)");

                 return 1;

             }

@@ -94,31 +113,27 @@
                 ByteString inputString = ByteString.CopyFrom(inputData);

                 IMessage sampleMessage =

                     defaultMessage.WeakCreateBuilderForType().WeakMergeFrom(inputString).WeakBuild();

-                Benchmark("Serialize to byte string", inputData.Length, () => sampleMessage.ToByteString());

-                Benchmark("Serialize to byte array", inputData.Length, () => sampleMessage.ToByteArray());

-                Benchmark("Serialize to memory stream", inputData.Length,

+                if(!FastTest) RunBenchmark("Serialize to byte string", inputData.Length, () => sampleMessage.ToByteString());

+                RunBenchmark("Serialize to byte array", inputData.Length, () => sampleMessage.ToByteArray());

+                if (!FastTest) RunBenchmark("Serialize to memory stream", inputData.Length,

                           () => sampleMessage.WriteTo(new MemoryStream()));

-                Benchmark("Deserialize from byte string", inputData.Length,

+                if (!FastTest) RunBenchmark("Deserialize from byte string", inputData.Length,

                           () => defaultMessage.WeakCreateBuilderForType()

                                     .WeakMergeFrom(inputString)

                                     .WeakBuild()

                     );

-                Benchmark("Deserialize from byte array", inputData.Length,

+                RunBenchmark("Deserialize from byte array", inputData.Length,

                           () => defaultMessage.WeakCreateBuilderForType()

                                     .WeakMergeFrom(CodedInputStream.CreateInstance(inputData))

                                     .WeakBuild()

                     );

-                Benchmark("Deserialize from memory stream", inputData.Length, () =>

-                                                                                  {

-                                                                                      inputStream.Position = 0;

-                                                                                      defaultMessage.

-                                                                                          WeakCreateBuilderForType()

-                                                                                          .WeakMergeFrom(

-                                                                                              CodedInputStream.

-                                                                                                  CreateInstance(

-                                                                                                      inputStream))

-                                                                                          .WeakBuild();

-                                                                                  });

+                if (!FastTest) RunBenchmark("Deserialize from memory stream", inputData.Length, 

+                    () => {

+                      inputStream.Position = 0;

+                      defaultMessage.WeakCreateBuilderForType().WeakMergeFrom(

+                              CodedInputStream.CreateInstance(inputStream))

+                          .WeakBuild();

+                  });

                 Console.WriteLine();

                 return true;

             }

@@ -131,7 +146,79 @@
             }

         }

 

-        private static void Benchmark(string name, long dataSize, Action action)

+        private static void BenchmarkV2(string name, long dataSize, Action action)

+        {

+            TimeSpan elapsed = TimeSpan.Zero;

+            long runs = 0;

+            long totalCount = 0;

+            double best = double.MinValue, worst = double.MaxValue;

+

+            ThreadStart threadProc = 

+                delegate()

+                    {

+                        action();

+                        // Run it progressively more times until we've got a reasonable sample

+

+                        int iterations = 100;

+                        elapsed = TimeAction(action, iterations);

+                        while (elapsed.TotalMilliseconds < 1000)

+                        {

+                            elapsed += TimeAction(action, iterations);

+                            iterations *= 2;

+                        }

+

+                        TimeSpan target = TimeSpan.FromSeconds(1);

+

+                        elapsed = TimeAction(action, iterations);

+                        iterations = (int)((target.Ticks * iterations) / (double)elapsed.Ticks);

+                        elapsed = TimeAction(action, iterations);

+                        iterations = (int)((target.Ticks * iterations) / (double)elapsed.Ticks);

+                        elapsed = TimeAction(action, iterations);

+                        iterations = (int)((target.Ticks * iterations) / (double)elapsed.Ticks);

+

+                        double first = (iterations * dataSize) / (elapsed.TotalSeconds * 1024 * 1024);

+                        if (Verbose) Console.WriteLine("Round ---: Count = {1,6}, Bps = {2,8:f3}", 0, iterations, first);

+                        elapsed = TimeSpan.Zero;

+                        int max = FastTest ? 30 : 100;

+

+                        while (runs < max)

+                        {

+                            TimeSpan cycle = TimeAction(action, iterations);

+                            // Accumulate and scale for next cycle.

+                            

+                            double bps = (iterations * dataSize) / (cycle.TotalSeconds * 1024 * 1024);

+                            if (Verbose) Console.WriteLine("Round {0,3}: Count = {1,6}, Bps = {2,8:f3}", runs, iterations, bps);

+                            if (runs == 0 && bps > first * 1.1)

+                            {

+                                if (Verbose) Console.WriteLine("Warming up...");

+                                iterations = (int)((target.Ticks * iterations) / (double)cycle.Ticks);

+                                first = bps;

+                                continue;//still warming up...

+                            }

+

+                            best = Math.Max(best, bps);

+                            worst = Math.Min(worst, bps);

+

+                            runs++;

+                            elapsed += cycle;

+                            totalCount += iterations;

+                            iterations = (int) ((target.Ticks*totalCount)/(double) elapsed.Ticks);

+                        }

+                    };

+

+            Thread work = new Thread(threadProc);

+            work.Name = "Worker";

+            work.Priority = ThreadPriority.Highest;

+            work.SetApartmentState(ApartmentState.STA);

+            work.Start();

+            work.Join();

+

+            Console.WriteLine("{0}: averages {1} per {2:f3}s for {3} runs; avg: {4:f3}mbps; best: {5:f3}mbps; worst: {6:f3}mbps",

+                              name, totalCount / runs, elapsed.TotalSeconds / runs, runs,

+                              (totalCount * dataSize) / (elapsed.TotalSeconds * 1024 * 1024), best, worst);

+        }

+

+        private static void BenchmarkV1(string name, long dataSize, Action action)

         {

             // Make sure it's JITted

             action();

@@ -156,7 +243,9 @@
         private static TimeSpan TimeAction(Action action, int iterations)

         {

             GC.Collect();

+            GC.GetTotalMemory(true);

             GC.WaitForPendingFinalizers();

+

             Stopwatch sw = Stopwatch.StartNew();

             for (int i = 0; i < iterations; i++)

             {