Optimize visualbench offscreen blits

Modifies NvprWrappedBenchmark to present its offscreen canvas via
GrContext::copySurface rather than going through the SkCanvas API.
Only copies a subrectangle the size of the benchmark, not the entire
canvas.

BUG=skia:

Review URL: https://codereview.chromium.org/1443263002
diff --git a/tools/VisualBench/VisualBenchmarkStream.cpp b/tools/VisualBench/VisualBenchmarkStream.cpp
index cfeff75..7f072e8 100644
--- a/tools/VisualBench/VisualBenchmarkStream.cpp
+++ b/tools/VisualBench/VisualBenchmarkStream.cpp
@@ -39,9 +39,15 @@
 public:
     WarmupBench() {
         sk_tool_utils::make_big_path(fPath);
+        fPerlinRect = SkRect::MakeLTRB(0., 0., 400., 400.);
     }
 private:
     const char* onGetName() override { return "warmupbench"; }
+    SkIPoint onGetSize() override {
+        int w = SkScalarCeilToInt(SkTMax(fPath.getBounds().right(), fPerlinRect.right()));
+        int h = SkScalarCeilToInt(SkTMax(fPath.getBounds().bottom(), fPerlinRect.bottom()));
+        return SkIPoint::Make(w, h);
+    }
     void onDraw(int loops, SkCanvas* canvas) override {
         // We draw a big path to warm up the cpu, and then use perlin noise shader to warm up the
         // gpu
@@ -52,10 +58,9 @@
         SkPaint perlinPaint;
         perlinPaint.setShader(SkPerlinNoiseShader::CreateTurbulence(0.1f, 0.1f, 1, 0,
                                                                     nullptr))->unref();
-        SkRect rect = SkRect::MakeLTRB(0., 0., 400., 400.);
         for (int i = 0; i < loops; i++) {
             canvas->drawPath(fPath, paint);
-            canvas->drawRect(rect, perlinPaint);
+            canvas->drawRect(fPerlinRect, perlinPaint);
 #if SK_SUPPORT_GPU
             // Ensure the GrContext doesn't batch across draw loops.
             if (GrContext* context = canvas->getGrContext()) {
@@ -65,6 +70,7 @@
         }
     }
     SkPath fPath;
+    SkRect fPerlinRect;
 };
 
 VisualBenchmarkStream::VisualBenchmarkStream(const SkSurfaceProps& surfaceProps)
diff --git a/tools/VisualBench/VisualSKPBench.cpp b/tools/VisualBench/VisualSKPBench.cpp
index 628265e..5d15d7e 100644
--- a/tools/VisualBench/VisualSKPBench.cpp
+++ b/tools/VisualBench/VisualSKPBench.cpp
@@ -14,6 +14,7 @@
 
 VisualSKPBench::VisualSKPBench(const char* name, const SkPicture* pic)
     : fPic(SkRef(pic))
+    , fCullRect(fPic->cullRect().roundOut())
     , fName(name) {
     fUniqueName.printf("%s", name);
 }
@@ -30,7 +31,17 @@
     return backend != kNonRendering_Backend;
 }
 
+SkIPoint VisualSKPBench::onGetSize() {
+    return SkIPoint::Make(fCullRect.width(), fCullRect.height());
+}
+
 void VisualSKPBench::onDraw(int loops, SkCanvas* canvas) {
+    bool isOffset = SkToBool(fCullRect.left() | fCullRect.top());
+    if (isOffset) {
+        canvas->save();
+        canvas->translate(SkIntToScalar(-fCullRect.left()), SkIntToScalar(-fCullRect.top()));
+    }
+
     for (int i = 0; i < loops; i++) {
         canvas->drawPicture(fPic);
 #if SK_SUPPORT_GPU
@@ -40,4 +51,8 @@
         }
 #endif
     }
+
+    if (isOffset) {
+        canvas->restore();
+    }
 }
diff --git a/tools/VisualBench/VisualSKPBench.h b/tools/VisualBench/VisualSKPBench.h
index 6b9593b..dc22fec 100644
--- a/tools/VisualBench/VisualSKPBench.h
+++ b/tools/VisualBench/VisualSKPBench.h
@@ -24,10 +24,12 @@
     const char* onGetName() override;
     const char* onGetUniqueName() override;
     bool isSuitableFor(Backend backend) override;
+    SkIPoint onGetSize() override;
     void onDraw(int loops, SkCanvas* canvas) override;
 
 private:
     SkAutoTUnref<const SkPicture> fPic;
+    SkIRect fCullRect;
     SkString fName;
     SkString fUniqueName;
 
diff --git a/tools/VisualBench/WrappedBenchmark.h b/tools/VisualBench/WrappedBenchmark.h
index ffa0da3..dba6134 100644
--- a/tools/VisualBench/WrappedBenchmark.h
+++ b/tools/VisualBench/WrappedBenchmark.h
@@ -9,7 +9,10 @@
 #define WrappedBenchmark_DEFINED
 
 #include "Benchmark.h"
+#include "SkDevice.h"
 #include "SkSurface.h"
+#include "GrContext.h"
+#include "GrRenderTarget.h"
 
 // Wrap some other benchmark to allow specialization to either
 // cpu or gpu backends. The derived class will override 'setupOffScreen'
@@ -28,7 +31,8 @@
 
     void onDelayedSetup() override { fBench->delayedSetup(); }
     void onPerCanvasPreDraw(SkCanvas* canvas) override {
-        fOffScreen.reset(this->setupOffScreen(canvas));
+        this->setupOffScreen(canvas);
+        fOffScreen->getCanvas()->clear(SK_ColorWHITE);
         fBench->perCanvasPreDraw(fOffScreen->getCanvas());
     }
     void onPreDraw(SkCanvas* canvas) override {
@@ -47,14 +51,21 @@
     void onDraw(int loops, SkCanvas* canvas) override {
         SkASSERT(fOffScreen.get());
         fBench->draw(loops, fOffScreen->getCanvas());
-        SkAutoTUnref<SkImage> image(fOffScreen->newImageSnapshot());
-        canvas->drawImage(image, 0,0);
+        this->blitToScreen(canvas);
     }
 
     virtual SkIPoint onGetSize() override { return fBench->getSize(); }
 
-private:
-    virtual SkSurface* setupOffScreen(SkCanvas*)=0;
+protected:
+    virtual void setupOffScreen(SkCanvas*)=0;
+
+    void blitToScreen(SkCanvas* canvas) {
+        int w = SkTMin(fBench->getSize().fX, fOffScreen->width());
+        int h = SkTMin(fBench->getSize().fY, fOffScreen->width());
+        this->onBlitToScreen(canvas, w, h);
+    }
+
+    virtual void onBlitToScreen(SkCanvas* canvas, int w, int h) = 0;
 
     SkSurfaceProps          fSurfaceProps;
     SkAutoTUnref<SkSurface> fOffScreen;
@@ -68,8 +79,16 @@
         : INHERITED(surfaceProps, bench) {}
 
 private:
-    SkSurface* setupOffScreen(SkCanvas* canvas) override {
-        return SkSurface::NewRaster(canvas->imageInfo(), &this->surfaceProps());
+    void setupOffScreen(SkCanvas* canvas) override {
+        fOffScreen.reset(SkSurface::NewRaster(canvas->imageInfo(), &this->surfaceProps()));
+    }
+
+    void onBlitToScreen(SkCanvas* canvas, int w, int h) override {
+        SkAutoTUnref<SkImage> image(fOffScreen->newImageSnapshot());
+        SkPaint blitPaint;
+        blitPaint.setXfermodeMode(SkXfermode::kSrc_Mode);
+        canvas->drawImageRect(image, SkIRect::MakeWH(w, h),
+                              SkRect::MakeWH(SkIntToScalar(w), SkIntToScalar(h)), &blitPaint);
     }
 
     typedef WrappedBenchmark INHERITED;
@@ -84,12 +103,36 @@
         , fNumSamples(numSamples) {}
 
 private:
-    SkSurface* setupOffScreen(SkCanvas* canvas) override {
-        return SkSurface::NewRenderTarget(canvas->getGrContext(),
-                                          SkSurface::kNo_Budgeted,
-                                          canvas->imageInfo(),
-                                          fNumSamples,
-                                          &this->surfaceProps());
+    void setupOffScreen(SkCanvas* canvas) override {
+        fOffScreen.reset(SkSurface::NewRenderTarget(canvas->getGrContext(),
+                                                    SkSurface::kNo_Budgeted,
+                                                    canvas->imageInfo(),
+                                                    fNumSamples,
+                                                    &this->surfaceProps()));
+    }
+
+    void onBlitToScreen(SkCanvas* canvas, int w, int h) override {
+        // We call copySurface directly on the underlying GPU surfaces for a more efficient blit.
+        GrRenderTarget* dst, *src;
+
+        SkCanvas::LayerIter canvasIter(canvas, false);
+        SkAssertResult((dst = canvasIter.device()->accessRenderTarget()));
+
+        SkCanvas::LayerIter offscreenIter(fOffScreen->getCanvas(), false);
+        SkAssertResult((src = offscreenIter.device()->accessRenderTarget()));
+
+        SkASSERT(dst->getContext() == src->getContext());
+
+        dst->getContext()->copySurface(dst, src, SkIRect::MakeWH(w, h), SkIPoint::Make(0, 0));
+
+#ifdef SK_DEBUG
+        // This method should not be called while layers are saved.
+        canvasIter.next();
+        SkASSERT(canvasIter.done());
+
+        offscreenIter.next();
+        SkASSERT(offscreenIter.done());
+#endif
     }
 
     int fNumSamples;