Add samplingTime mode to nanobench Adds a nanobench mode that takes samples for a fixed amount of time, rather than taking a fixed amount of samples. BUG=skia: Review URL: https://codereview.chromium.org/1204153002

commit: e1b8958877a512bf83cbc2c72bb31e7d71b06f43 [log] [tgz]
author: cdalton <cdalton@nvidia.com> Thu Jun 25 19:17:08 2015 -0700
committer: Commit bot <commit-bot@chromium.org> Thu Jun 25 19:17:08 2015 -0700
tree: 91db5defa35079fe6b7397a02a8ed7a1b170abde
parent: 1852ec2b46a2a0efbb3550ff4d8b640f56a810e9 [diff]
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp
index 9f3887b..4aad61d 100644
--- a/bench/nanobench.cpp
+++ b/bench/nanobench.cpp

@@ -55,6 +55,8 @@
 
 __SK_FORCE_IMAGE_DECODER_LINKING;
 
+static const int kTimedSampling = 0;
+
 static const int kAutoTuneLoops = 0;
 
 static const int kDefaultLoops =
@@ -72,9 +74,17 @@
     return help;
 }
 
+static SkString to_string(int n) {
+    SkString str;
+    str.appendS32(n);
+    return str;
+}
+
 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str());
 
 DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");
+DEFINE_string(samplingTime, "0", "Amount of time to run each bench. Takes precedence over samples."
+                                 "Must be \"0\", \"%%lfs\", or \"%%lfms\"");
 DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead.");
 DEFINE_double(overheadGoal, 0.0001,
               "Loop until timer overhead is at most this fraction of our measurments.");
@@ -269,7 +279,7 @@
 }
 
 static int kFailedLoops = -2;
-static int cpu_bench(const double overhead, Target* target, Benchmark* bench, double* samples) {
+static int setup_cpu_bench(const double overhead, Target* target, Benchmark* bench) {
     // First figure out approximately how many loops of bench it takes to make overhead negligible.
     double bench_plus_overhead = 0.0;
     int round = 0;
@@ -310,16 +320,10 @@
         loops = detect_forever_loops(loops);
     }
 
-    for (int i = 0; i < FLAGS_samples; i++) {
-        samples[i] = time(loops, bench, target) / loops;
-    }
     return loops;
 }
 
-static int gpu_bench(Target* target,
-                     Benchmark* bench,
-                     double* samples,
-                     int maxGpuFrameLag) {
+static int setup_gpu_bench(Target* target, Benchmark* bench, int maxGpuFrameLag) {
     // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.
     int loops = bench->calculateLoops(FLAGS_loops);
     if (kAutoTuneLoops == loops) {
@@ -355,11 +359,6 @@
         time(loops, bench, target);
     }
 
-    // Now, actually do the timing!
-    for (int i = 0; i < FLAGS_samples; i++) {
-        samples[i] = time(loops, bench, target) / loops;
-    }
-
     return loops;
 }
 
@@ -946,6 +945,24 @@
         FLAGS_verbose = true;
     }
 
+    double samplingTimeMs = 0;
+    if (0 != strcmp("0", FLAGS_samplingTime[0])) {
+        SkSTArray<8, char> timeUnit;
+        timeUnit.push_back_n(static_cast<int>(strlen(FLAGS_samplingTime[0])) + 1);
+        if (2 != sscanf(FLAGS_samplingTime[0], "%lf%s", &samplingTimeMs, timeUnit.begin()) ||
+            (0 != strcmp("s", timeUnit.begin()) && 0 != strcmp("ms", timeUnit.begin()))) {
+            SkDebugf("Invalid --samplingTime \"%s\". Must be \"0\", \"%%lfs\", or \"%%lfms\"\n",
+                     FLAGS_samplingTime[0]);
+            exit(0);
+        }
+        if (0 == strcmp("s", timeUnit.begin())) {
+            samplingTimeMs *= 1000;
+        }
+        if (samplingTimeMs) {
+            FLAGS_samples = kTimedSampling;
+        }
+    }
+
     if (kAutoTuneLoops != FLAGS_loops) {
         FLAGS_samples     = 1;
         FLAGS_gpuFrameLag = 0;
@@ -983,7 +1000,7 @@
     const double overhead = estimate_timer_overhead();
     SkDebugf("Timer overhead: %s\n", HUMANIZE(overhead));
 
-    SkAutoTMalloc<double> samples(FLAGS_samples);
+    SkTArray<double> samples;
 
     if (kAutoTuneLoops != FLAGS_loops) {
         SkDebugf("Fixed number of loops; times would only be misleading so we won't print them.\n");
@@ -991,6 +1008,8 @@
         // No header.
     } else if (FLAGS_quiet) {
         SkDebugf("median\tbench\tconfig\n");
+    } else if (kTimedSampling == FLAGS_samples) {
+        SkDebugf("curr/maxrss\tloops\tmin\tmedian\tmean\tmax\tstddev\tsamples\tconfig\tbench\n");
     } else {
         SkDebugf("curr/maxrss\tloops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n",
                  FLAGS_samples, "samples");
@@ -1022,11 +1041,29 @@
             targets[j]->setup();
             bench->perCanvasPreDraw(canvas);
 
-            int frameLag;
-            const int loops =
-                targets[j]->needsFrameTiming(&frameLag)
-                ? gpu_bench(targets[j], bench.get(), samples.get(), frameLag)
-                : cpu_bench(overhead, targets[j], bench.get(), samples.get());
+            int maxFrameLag;
+            const int loops = targets[j]->needsFrameTiming(&maxFrameLag)
+                ? setup_gpu_bench(targets[j], bench.get(), maxFrameLag)
+                : setup_cpu_bench(overhead, targets[j], bench.get());
+
+            if (kTimedSampling != FLAGS_samples) {
+                samples.reset(FLAGS_samples);
+                for (int s = 0; s < FLAGS_samples; s++) {
+                    samples[s] = time(loops, bench, targets[j]) / loops;
+                }
+            } else if (samplingTimeMs) {
+                samples.reset();
+                if (FLAGS_verbose) {
+                    SkDebugf("Begin sampling %s for %ims\n",
+                             bench->getUniqueName(), static_cast<int>(samplingTimeMs));
+                }
+                WallTimer timer;
+                timer.start();
+                do {
+                    samples.push_back(time(loops, bench, targets[j]) / loops);
+                    timer.end();
+                } while (timer.fWall < samplingTimeMs);
+            }
 
             bench->perCanvasPostDraw(canvas);
 
@@ -1043,7 +1080,7 @@
                 continue;
             }
 
-            Stats stats(samples.get(), FLAGS_samples);
+            Stats stats(samples);
             log->config(config);
             log->configOption("name", bench->getName());
             benchStream.fillCurrentOptions(log.get());
@@ -1063,7 +1100,7 @@
                          , bench->getUniqueName()
                          , config);
             } else if (FLAGS_verbose) {
-                for (int i = 0; i < FLAGS_samples; i++) {
+                for (int i = 0; i < samples.count(); i++) {
                     SkDebugf("%s  ", HUMANIZE(samples[i]));
                 }
                 SkDebugf("%s\n", bench->getUniqueName());
@@ -1083,7 +1120,8 @@
                         , HUMANIZE(stats.mean)
                         , HUMANIZE(stats.max)
                         , stddev_percent
-                        , stats.plot.c_str()
+                        , kTimedSampling != FLAGS_samples ? stats.plot.c_str()
+                                                          : to_string(samples.count()).c_str()
                         , config
                         , bench->getUniqueName()
                         );

diff --git a/tools/Stats.h b/tools/Stats.h
index 8487a94..12c1d35 100644
--- a/tools/Stats.h
+++ b/tools/Stats.h

@@ -1,3 +1,10 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
 #ifndef Stats_DEFINED
 #define Stats_DEFINED
 
@@ -11,7 +18,13 @@
 #endif
 
 struct Stats {
-    Stats(const double samples[], int n) {
+    Stats(const SkTArray<double>& samples) {
+        int n = samples.count();
+        if (!n) {
+            min = max = mean = var = median = 0;
+            return;
+        }
+
         min = samples[0];
         max = samples[0];
         for (int i = 0; i < n; i++) {
@@ -32,7 +45,7 @@
         var = err / (n-1);
 
         SkAutoTMalloc<double> sorted(n);
-        memcpy(sorted.get(), samples, n * sizeof(double));
+        memcpy(sorted.get(), samples.begin(), n * sizeof(double));
         SkTQSort(sorted.get(), sorted.get() + n - 1);
         median = sorted[n/2];
 

diff --git a/tools/VisualBench.cpp b/tools/VisualBench.cpp
index cbc8994..ac53b43 100644
--- a/tools/VisualBench.cpp
+++ b/tools/VisualBench.cpp

@@ -152,7 +152,7 @@
         SkDebugf("%s\n", shortName.c_str());
     } else {
         SkASSERT(measurements.count());
-        Stats stats(measurements.begin(), measurements.count());
+        Stats stats(measurements);
         const double stdDevPercent = 100 * sqrt(stats.var) / stats.mean;
         SkDebugf("%4d/%-4dMB\t%d\t%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\n",
                  sk_tools::getCurrResidentSetSizeMB(),
commit	e1b8958877a512bf83cbc2c72bb31e7d71b06f43	[log] [tgz]
author	cdalton <cdalton@nvidia.com>	Thu Jun 25 19:17:08 2015 -0700
committer	Commit bot <commit-bot@chromium.org>	Thu Jun 25 19:17:08 2015 -0700
tree	91db5defa35079fe6b7397a02a8ed7a1b170abde
parent	1852ec2b46a2a0efbb3550ff4d8b640f56a810e9 [diff]