Began logging more gpu stats from nanobench

BUG=skia:

Review URL: https://codereview.chromium.org/1489033004
diff --git a/bench/Benchmark.h b/bench/Benchmark.h
index a403a6e..30c7aa9 100644
--- a/bench/Benchmark.h
+++ b/bench/Benchmark.h
@@ -124,6 +124,8 @@
         this->perCanvasPostDraw(canvas);
     }
 
+    virtual void getGpuStats(SkCanvas*, SkTArray<SkString>* keys, SkTArray<double>* values) {}
+
 protected:
     virtual void setupPaint(SkPaint* paint);
 
diff --git a/bench/ResultsWriter.h b/bench/ResultsWriter.h
index 2906429..bf74d47 100644
--- a/bench/ResultsWriter.h
+++ b/bench/ResultsWriter.h
@@ -87,26 +87,26 @@
     }
 
     // Added under "key".
-    virtual void key(const char name[], const char value[]) {
+    void key(const char name[], const char value[]) override {
         fRoot["key"][name] = value;
     }
     // Inserted directly into the root.
-    virtual void property(const char name[], const char value[]) {
+    void property(const char name[], const char value[]) override {
         fRoot[name] = value;
     }
-    virtual void bench(const char name[], int32_t x, int32_t y) {
+    void bench(const char name[], int32_t x, int32_t y) override {
         SkString id = SkStringPrintf( "%s_%d_%d", name, x, y);
         fResults[id.c_str()] = Json::Value(Json::objectValue);
         fBench = &fResults[id.c_str()];
     }
-    virtual void config(const char name[]) {
+    void config(const char name[]) override {
         SkASSERT(fBench);
         fConfig = &(*fBench)[name];
     }
-    virtual void configOption(const char name[], const char* value) {
+    void configOption(const char name[], const char* value) override {
         (*fConfig)["options"][name] = value;
     }
-    virtual void metric(const char name[], double ms) {
+    void metric(const char name[], double ms) override {
         // Don't record if nan, or -nan.
         if (sk_double_isnan(ms)) {
             return;
@@ -116,7 +116,7 @@
     }
 
     // Flush to storage now please.
-    virtual void flush() {
+    void flush() override {
         SkString dirname = SkOSPath::Dirname(fFilename.c_str());
         if (!sk_exists(dirname.c_str(), kWrite_SkFILE_Flag)) {
             if (!sk_mkdir(dirname.c_str())) {
diff --git a/bench/SKPBench.cpp b/bench/SKPBench.cpp
index 910af6b..c548090 100644
--- a/bench/SKPBench.cpp
+++ b/bench/SKPBench.cpp
@@ -155,3 +155,41 @@
         fSurfaces[j]->getCanvas()->flush();
     }
 }
+
+#if SK_SUPPORT_GPU
+static void draw_pic_for_stats(SkCanvas* canvas, GrContext* context, const SkPicture* picture,
+                               SkTArray<SkString>* keys, SkTArray<double>* values,
+                               const char* tag) {
+    context->resetGpuStats();
+    canvas->drawPicture(picture);
+    canvas->flush();
+
+    int offset = keys->count();
+    context->dumpGpuStatsKeyValuePairs(keys, values);
+
+    // append tag, but only to new tags
+    for (int i = offset; i < keys->count(); i++, offset++) {
+        (*keys)[i].appendf("_%s", tag);
+    }
+}
+#endif
+
+void SKPBench::getGpuStats(SkCanvas* canvas, SkTArray<SkString>* keys, SkTArray<double>* values) {
+#if SK_SUPPORT_GPU
+    // we do a special single draw and then dump the key / value pairs
+    GrContext* context = canvas->getGrContext();
+    if (!context) {
+        return;
+    }
+
+    // TODO refactor this out if we want to test other subclasses of skpbench
+    context->flush();
+    context->freeGpuResources();
+    context->resetContext();
+    draw_pic_for_stats(canvas, context, fPic, keys, values, "first_frame");
+
+    // draw second frame
+    draw_pic_for_stats(canvas, context, fPic, keys, values, "second_frame");
+
+#endif
+}
diff --git a/bench/SKPBench.h b/bench/SKPBench.h
index 89c9a36..1f34a00 100644
--- a/bench/SKPBench.h
+++ b/bench/SKPBench.h
@@ -28,6 +28,8 @@
         return fDoLooping ? defaultLoops : 1;
     }
 
+    void getGpuStats(SkCanvas*, SkTArray<SkString>* keys, SkTArray<double>* values) override;
+
 protected:
     const char* onGetName() override;
     const char* onGetUniqueName() override;
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp
index 99483eb..602d613 100644
--- a/bench/nanobench.cpp
+++ b/bench/nanobench.cpp
@@ -107,6 +107,7 @@
 DEFINE_int32(flushEvery, 10, "Flush --outResultsFile every Nth run.");
 DEFINE_bool(resetGpuContext, true, "Reset the GrContext before running each test.");
 DEFINE_bool(gpuStats, false, "Print GPU stats after each gpu benchmark?");
+DEFINE_bool(gpuStatsDump, false, "Dump GPU states after each benchmark to json");
 
 static double now_ms() { return SkTime::GetNSecs() * 1e-6; }
 
@@ -1184,6 +1185,16 @@
                 }
             }
 
+#if SK_SUPPORT_GPU
+            SkTArray<SkString> keys;
+            SkTArray<double> values;
+            bool gpuStatsDump = FLAGS_gpuStatsDump && Benchmark::kGPU_Backend == configs[i].backend;
+            if (gpuStatsDump) {
+                // TODO cache stats
+                bench->getGpuStats(canvas, &keys, &values);
+            }
+#endif
+
             bench->perCanvasPostDraw(canvas);
 
             if (Benchmark::kNonRendering_Backend != target->config.backend &&
@@ -1206,6 +1217,16 @@
             benchStream.fillCurrentOptions(log.get());
             target->fillOptions(log.get());
             log->metric("min_ms",    stats.min);
+#if SK_SUPPORT_GPU
+            if (gpuStatsDump) {
+                // dump to json, only SKPBench currently returns valid keys / values
+                SkASSERT(keys.count() == values.count());
+                for (int i = 0; i < keys.count(); i++) {
+                    log->metric(keys[i].c_str(), values[i]);
+                }
+            }
+#endif
+
             if (runs++ % FLAGS_flushEvery == 0) {
                 log->flush();
             }
@@ -1240,13 +1261,14 @@
                         , bench->getUniqueName()
                         );
             }
+
 #if SK_SUPPORT_GPU
-            if (FLAGS_gpuStats &&
-                Benchmark::kGPU_Backend == configs[i].backend) {
+            if (FLAGS_gpuStats && Benchmark::kGPU_Backend == configs[i].backend) {
                 gGrFactory->get(configs[i].ctxType)->printCacheStats();
                 gGrFactory->get(configs[i].ctxType)->printGpuStats();
             }
 #endif
+
             if (FLAGS_verbose) {
                 SkDebugf("Samples:  ");
                 for (int i = 0; i < samples.count(); i++) {
diff --git a/include/gpu/GrContext.h b/include/gpu/GrContext.h
index b492fe7..df96592 100644
--- a/include/gpu/GrContext.h
+++ b/include/gpu/GrContext.h
@@ -330,12 +330,16 @@
     // Called by tests that draw directly to the context via GrDrawTarget
     void getTestTarget(GrTestTarget*, GrRenderTarget* rt);
 
+    /** Reset GPU stats */
+    void resetGpuStats() const ;
+
     /** Prints cache stats to the string if GR_CACHE_STATS == 1. */
     void dumpCacheStats(SkString*) const;
     void printCacheStats() const;
 
     /** Prints GPU stats to the string if GR_GPU_STATS == 1. */
     void dumpGpuStats(SkString*) const;
+    void dumpGpuStatsKeyValuePairs(SkTArray<SkString>* names, SkTArray<double>* values) const;
     void printGpuStats() const;
 
     /** Specify the TextBlob cache limit. If the current cache exceeds this limit it will purge.
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index c5fa61b..dd33cff 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -342,6 +342,7 @@
         void incStencilAttachmentCreates() { fStencilAttachmentCreates++; }
         void incNumDraws() { fNumDraws++; }
         void dump(SkString*);
+        void dumpKeyValuePairs(SkTArray<SkString>* keys, SkTArray<double>* values);
 
     private:
         int fRenderTargetBinds;
@@ -351,7 +352,8 @@
         int fStencilAttachmentCreates;
         int fNumDraws;
 #else
-        void dump(SkString*) {};
+        void dump(SkString*) {}
+        void dumpKeyValuePairs(SkTArray<SkString>*, SkTArray<double>*) {}
         void incRenderTargetBinds() {}
         void incShaderCompilations() {}
         void incTextureCreates() {}
diff --git a/src/gpu/GrTest.cpp b/src/gpu/GrTest.cpp
index b0d92fe..29b13c7 100644
--- a/src/gpu/GrTest.cpp
+++ b/src/gpu/GrTest.cpp
@@ -93,6 +93,12 @@
     fResourceCache->purgeAllUnlocked();
 }
 
+void GrContext::resetGpuStats() const {
+#if GR_GPU_STATS
+    fGpu->stats()->reset();
+#endif
+}
+
 void GrContext::dumpCacheStats(SkString* out) const {
 #if GR_CACHE_STATS
     fResourceCache->dumpStats(out);
@@ -111,6 +117,13 @@
 #endif
 }
 
+void GrContext::dumpGpuStatsKeyValuePairs(SkTArray<SkString>* keys,
+                                          SkTArray<double>* values) const {
+#if GR_GPU_STATS
+    return fGpu->stats()->dumpKeyValuePairs(keys, values);
+#endif
+}
+
 void GrContext::printGpuStats() const {
     SkString out;
     this->dumpGpuStats(&out);
@@ -155,6 +168,16 @@
     out->appendf("Stencil Buffer Creates: %d\n", fStencilAttachmentCreates);
     out->appendf("Number of draws: %d\n", fNumDraws);
 }
+
+void GrGpu::Stats::dumpKeyValuePairs(SkTArray<SkString>* keys, SkTArray<double>* values) {
+    keys->push_back(SkString("render_target_binds")); values->push_back(fRenderTargetBinds);
+    keys->push_back(SkString("shader_compilations")); values->push_back(fShaderCompilations);
+    keys->push_back(SkString("textures_created")); values->push_back(fTextureCreates);
+    keys->push_back(SkString("texture_uploads")); values->push_back(fTextureUploads);
+    keys->push_back(SkString("stencil_buffer_creates")); values->push_back(fStencilAttachmentCreates);
+    keys->push_back(SkString("number_of_draws")); values->push_back(fNumDraws);
+}
+
 #endif
 
 #if GR_CACHE_STATS