nanobench: split CPU and GPU .SKP tile sizes.

Let's make CPU-bound .SKP benching mimic Chrome's tiles.
Unfortunately, the CPU code also performs a lot better with those big wide tiles...

BUG=skia:

Review URL: https://codereview.chromium.org/1189863002
diff --git a/bench/SKPBench.cpp b/bench/SKPBench.cpp
index 592d042..757ee46 100644
--- a/bench/SKPBench.cpp
+++ b/bench/SKPBench.cpp
@@ -10,8 +10,12 @@
 #include "SkMultiPictureDraw.h"
 #include "SkSurface.h"
 
-DEFINE_int32(benchTileW, 1600, "Tile width  used for SKP playback.");
-DEFINE_int32(benchTileH, 512, "Tile height used for SKP playback.");
+// These CPU tile sizes are not good per se, but they are similar to what Chrome uses.
+DEFINE_int32(CPUbenchTileW, 256, "Tile width  used for CPU SKP playback.");
+DEFINE_int32(CPUbenchTileH, 256, "Tile height used for CPU SKP playback.");
+
+DEFINE_int32(GPUbenchTileW, 1600, "Tile width  used for GPU SKP playback.");
+DEFINE_int32(GPUbenchTileH, 512, "Tile height used for GPU SKP playback.");
 
 SKPBench::SKPBench(const char* name, const SkPicture* pic, const SkIRect& clip, SkScalar scale,
                    bool useMultiPictureDraw)
@@ -44,8 +48,12 @@
     SkIRect bounds;
     SkAssertResult(canvas->getClipDeviceBounds(&bounds));
 
-    int tileW = SkTMin(FLAGS_benchTileW, bounds.width());
-    int tileH = SkTMin(FLAGS_benchTileH, bounds.height());
+    const bool gpu = canvas->getGrContext() != nullptr;
+    int tileW = gpu ? FLAGS_GPUbenchTileW : FLAGS_CPUbenchTileW,
+        tileH = gpu ? FLAGS_GPUbenchTileH : FLAGS_CPUbenchTileH;
+
+    tileW = SkTMin(tileW, bounds.width());
+    tileH = SkTMin(tileH, bounds.height());
 
     int xTiles = SkScalarCeilToInt(bounds.width()  / SkIntToScalar(tileW));
     int yTiles = SkScalarCeilToInt(bounds.height() / SkIntToScalar(tileH));