Refine bench_record and bench_playback:

  - use high-precision wall timer only
  - warm caches once before measuring
  - measure independent samples, calculating statistics
  - add --verbose to control how much data we output

Also removed some unloved features from bench_record.

BUG=skia:
R=jcgregorio@google.com, mtklein@google.com

Author: mtklein@chromium.org

Review URL: https://codereview.chromium.org/338203002
diff --git a/tools/bench_playback.cpp b/tools/bench_playback.cpp
index ffe9e23..f07fa8e 100644
--- a/tools/bench_playback.cpp
+++ b/tools/bench_playback.cpp
@@ -5,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-#include "BenchTimer.h"
 #include "SkCommandLineFlags.h"
 #include "SkForceLinking.h"
 #include "SkGraphics.h"
@@ -16,19 +15,27 @@
 #include "SkStream.h"
 #include "SkString.h"
 
+#include "BenchTimer.h"
+#include "Stats.h"
+
+typedef WallTimer Timer;
+
 __SK_FORCE_IMAGE_DECODER_LINKING;
 
-DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
-DEFINE_int32(loops, 10, "Number of times to play back each SKP.");
+DEFINE_string2(skps, r, "skps", "Directory containing SKPs to playback.");
+DEFINE_int32(samples, 10, "Gather this many samples of each picture playback.");
 DEFINE_bool(skr, false, "Play via SkRecord instead of SkPicture.");
 DEFINE_int32(tile, 1000000000, "Simulated tile size.");
 DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
 DEFINE_string(timescale, "ms", "Print times in ms, us, or ns");
+DEFINE_int32(verbose, 0, "0: print min sample; "
+                         "1: print min, mean, max and noise indication "
+                         "2: print all samples");
 
-static double scale_time(double ms) {
-    if (FLAGS_timescale.contains("us")) ms *= 1000;
-    if (FLAGS_timescale.contains("ns")) ms *= 1000000;
-    return ms;
+static double timescale() {
+    if (FLAGS_timescale.contains("us")) return 1000;
+    if (FLAGS_timescale.contains("ns")) return 1000000;
+    return 1;
 }
 
 static SkPicture* rerecord_with_tilegrid(SkPicture& src) {
@@ -49,6 +56,14 @@
     return recording.releasePlayback();
 }
 
+static void draw(const EXPERIMENTAL::SkPlayback& skr, const SkPicture& skp, SkCanvas* canvas) {
+    if (FLAGS_skr) {
+        skr.draw(canvas);
+    } else {
+        skp.draw(canvas);
+    }
+}
+
 static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
     SkAutoTUnref<SkPicture> picture(rerecord_with_tilegrid(src));
     SkAutoTDelete<EXPERIMENTAL::SkPlayback> record(rerecord_with_skr(src));
@@ -59,19 +74,34 @@
                                                                 src.width() * sizeof(SkPMColor)));
     canvas->clipRect(SkRect::MakeWH(SkIntToScalar(FLAGS_tile), SkIntToScalar(FLAGS_tile)));
 
-    BenchTimer timer;
-    timer.start();
-    for (int i = 0; i < FLAGS_loops; i++) {
-        if (FLAGS_skr) {
-            record->draw(canvas.get());
-        } else {
-            picture->draw(canvas.get());
-        }
-    }
-    timer.end();
+    // Draw once to warm any caches.  The first sample otherwise can be very noisy.
+    draw(*record, *picture, canvas.get());
 
-    const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
-    printf("%f\t%s\n", scale_time(msPerLoop), name);
+    Timer timer;
+    SkAutoTMalloc<double> samples(FLAGS_samples);
+    for (int i = 0; i < FLAGS_samples; i++) {
+        // We assume timer overhead (typically, ~30ns) is insignificant
+        // compared to draw runtime (at least ~100us, usually several ms).
+        timer.start(timescale());
+        draw(*record, *picture, canvas.get());
+        timer.end();
+        samples[i] = timer.fWall;
+    }
+
+    Stats stats(samples.get(), FLAGS_samples);
+    if (FLAGS_verbose == 0) {
+        printf("%g\t%s\n", stats.min, name);
+    } else if (FLAGS_verbose == 1) {
+        // Get a rough idea of how noisy the measurements were.
+        const double noisePercent = 100 * sqrt(stats.var) / stats.mean;
+        printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name);
+    } else if (FLAGS_verbose == 2) {
+        printf("%s", name);
+        for (int i = 0; i < FLAGS_samples; i++) {
+            printf("\t%g", samples[i]);
+        }
+        printf("\n");
+    }
 }
 
 int tool_main(int argc, char** argv);