Batch across matrix changes in drawVertices and add GM to test.

Change-Id: I6b08d37781e3c715a1d9d8c9729667ec78625836
Reviewed-on: https://skia-review.googlesource.com/7949
Commit-Queue: Brian Salomon <bsalomon@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/gm/vertices.cpp b/gm/vertices.cpp
index da79d7d..8cfb164 100644
--- a/gm/vertices.cpp
+++ b/gm/vertices.cpp
@@ -11,12 +11,13 @@
 #include "SkGradientShader.h"
 #include "SkRandom.h"
 
-static sk_sp<SkShader> make_shader1(SkScalar w, SkScalar h) {
+static constexpr SkScalar kShaderSize = 40;
+static sk_sp<SkShader> make_shader1() {
     const SkColor colors[] = {
         SK_ColorRED, SK_ColorCYAN, SK_ColorGREEN, SK_ColorWHITE,
         SK_ColorMAGENTA, SK_ColorBLUE, SK_ColorYELLOW,
     };
-    const SkPoint pts[] = { { w/4, 0 }, { 3*w/4, h } };
+    const SkPoint pts[] = {{kShaderSize / 4, 0}, {3 * kShaderSize / 4, kShaderSize}};
 
     return SkGradientShader::MakeLinear(pts, colors, nullptr, SK_ARRAY_COUNT(colors),
                                         SkShader::kMirror_TileMode);
@@ -30,10 +31,48 @@
     return SkColorFilter::MakeModeFilter(0xFFAABBCC, SkBlendMode::kDarken);
 }
 
+static constexpr SkScalar kMeshSize = 30;
+
+// start with the center of a 3x3 grid of vertices.
+static constexpr uint16_t kMeshFan[] = {
+        4,
+        0, 1, 2, 5, 8, 7, 6, 3, 0
+};
+
+static const int kMeshVertexCnt = 9;
+
+static void fill_mesh(SkPoint pts[kMeshVertexCnt], SkPoint texs[kMeshVertexCnt],
+                      SkColor colors[kMeshVertexCnt]) {
+    pts[0].set(0, 0);
+    pts[1].set(kMeshSize / 2, 3);
+    pts[2].set(kMeshSize, 0);
+    pts[3].set(3, kMeshSize / 2);
+    pts[4].set(kMeshSize / 2, kMeshSize / 2);
+    pts[5].set(kMeshSize - 3, kMeshSize / 2);
+    pts[6].set(0, kMeshSize);
+    pts[7].set(kMeshSize / 2, kMeshSize - 3);
+    pts[8].set(kMeshSize, kMeshSize);
+
+    texs[0].set(0, 0);
+    texs[1].set(kShaderSize / 2, 0);
+    texs[2].set(kShaderSize, 0);
+    texs[3].set(0, kShaderSize / 2);
+    texs[4].set(kShaderSize / 2, kShaderSize / 2);
+    texs[5].set(kShaderSize, kShaderSize / 2);
+    texs[6].set(0, kShaderSize);
+    texs[7].set(kShaderSize / 2, kShaderSize);
+    texs[8].set(kShaderSize, kShaderSize);
+
+    SkRandom rand;
+    for (size_t i = 0; i < kMeshVertexCnt; ++i) {
+        colors[i] = rand.nextU() | 0xFF000000;
+    }
+}
+
 class VerticesGM : public skiagm::GM {
-    SkPoint                 fPts[9];
-    SkPoint                 fTexs[9];
-    SkColor                 fColors[9];
+    SkPoint                 fPts[kMeshVertexCnt];
+    SkPoint                 fTexs[kMeshVertexCnt];
+    SkColor                 fColors[kMeshVertexCnt];
     sk_sp<SkShader>         fShader1;
     sk_sp<SkShader>         fShader2;
     sk_sp<SkColorFilter>    fColorFilter;
@@ -44,28 +83,10 @@
 protected:
 
     void onOnceBeforeDraw() override {
-        const SkScalar X = 30;
-        const SkScalar Y = 30;
-
-        fPts[0].set(0, 0);    fPts[1].set(X/2, 3);   fPts[2].set(X, 0);
-        fPts[3].set(3, Y/2);  fPts[4].set(X/2, Y/2); fPts[5].set(X-3, Y/2);
-        fPts[6].set(0, Y);    fPts[7].set(X/2, Y-3); fPts[8].set(X, Y);
-
-        const SkScalar w = 40;
-        const SkScalar h = 40;
-
-        fTexs[0].set(0, 0);     fTexs[1].set(w/2, 0);   fTexs[2].set(w, 0);
-        fTexs[3].set(0, h/2);   fTexs[4].set(w/2, h/2); fTexs[5].set(w, h/2);
-        fTexs[6].set(0, h);     fTexs[7].set(w/2, h);   fTexs[8].set(w, h);
-
-        fShader1 = make_shader1(w, h);
+        fill_mesh(fPts, fTexs, fColors);
+        fShader1 = make_shader1();
         fShader2 = make_shader2();
         fColorFilter = make_color_filter();
-
-        SkRandom rand;
-        for (size_t i = 0; i < SK_ARRAY_COUNT(fColors); ++i) {
-            fColors[i] = rand.nextU() | 0xFF000000;
-        }
     }
 
     SkString onShortName() override {
@@ -78,12 +99,6 @@
     }
 
     void onDraw(SkCanvas* canvas) override {
-        // start with the center of a 3x3 grid
-        constexpr uint16_t fan[] = {
-            4,
-            0, 1, 2, 5, 8, 7, 6, 3, 0
-        };
-
         const struct {
             const SkColor*              fColors;
             const SkPoint*              fTexs;
@@ -160,10 +175,9 @@
                 paint.setColorFilter(rec[i].fColorFilter);
                 paint.setAlpha(rec[i].fAlpha);
                 //if (2 == x)
-                canvas->drawVertices(SkCanvas::kTriangleFan_VertexMode,
-                                     SK_ARRAY_COUNT(fPts), fPts,
-                                     rec[i].fTexs, rec[i].fColors,
-                                     modes[j], fan, SK_ARRAY_COUNT(fan), paint);
+                canvas->drawVertices(SkCanvas::kTriangleFan_VertexMode, kMeshVertexCnt, fPts,
+                                     rec[i].fTexs, rec[i].fColors, modes[j], kMeshFan,
+                                     SK_ARRAY_COUNT(kMeshFan), paint);
                 canvas->translate(40, 0);
                 ++x;
             }
@@ -179,3 +193,45 @@
 /////////////////////////////////////////////////////////////////////////////////////
 
 DEF_GM(return new VerticesGM();)
+
+// This test exists to exercise batching in the gpu backend.
+DEF_SIMPLE_GM(vertices_batching, canvas, 50, 500) {
+    SkPoint pts[kMeshVertexCnt];
+    SkPoint texs[kMeshVertexCnt];
+    SkColor colors[kMeshVertexCnt];
+    fill_mesh(pts, texs, colors);
+    SkTDArray<SkMatrix> matrices;
+    matrices.push()->reset();
+    matrices.push()->setTranslate(0, 40);
+    SkMatrix* m = matrices.push();
+    m->setRotate(45, kMeshSize / 2, kMeshSize / 2);
+    m->postScale(1.2f, .8f, kMeshSize / 2, kMeshSize / 2);
+    m->postTranslate(0, 80);
+
+    auto shader = make_shader1();
+
+    // Triangle fans can't batch so we convert to regular triangles,
+    static constexpr int kNumTris = SK_ARRAY_COUNT(kMeshFan) - 2;
+    uint16_t indices[3 * kNumTris];
+    for (size_t i = 0; i < kNumTris; ++i) {
+        indices[3 * i] = kMeshFan[0];
+        indices[3 * i + 1] = kMeshFan[i + 1];
+        indices[3 * i + 2] = kMeshFan[i + 2];
+    }
+    canvas->translate(10, 10);
+    for (bool useShader : {false, true}) {
+        for (bool useTex : {false, true}) {
+            for (const auto& m : matrices) {
+                canvas->save();
+                canvas->concat(m);
+                SkPaint paint;
+                const SkPoint* t = useTex ? texs : nullptr;
+                paint.setShader(useShader ? shader : nullptr);
+                canvas->drawVertices(SkCanvas::kTriangles_VertexMode, kMeshVertexCnt, pts, t,
+                                     colors, indices, SK_ARRAY_COUNT(indices), paint);
+                canvas->restore();
+            }
+            canvas->translate(0, 120);
+        }
+    }
+}
diff --git a/include/core/SkMatrix.h b/include/core/SkMatrix.h
index f565a53..5f665ea 100644
--- a/include/core/SkMatrix.h
+++ b/include/core/SkMatrix.h
@@ -461,8 +461,7 @@
 
     /** Like mapPoints but with custom byte stride between the points.
     */
-    void mapPointsWithStride(SkPoint dst[], SkPoint src[],
-                             size_t stride, int count) const {
+    void mapPointsWithStride(SkPoint dst[], const SkPoint src[], size_t stride, int count) const {
         SkASSERT(stride >= sizeof(SkPoint));
         SkASSERT(0 == stride % sizeof(SkScalar));
         for (int i = 0; i < count; ++i) {
diff --git a/src/gpu/ops/GrDrawVerticesOp.cpp b/src/gpu/ops/GrDrawVerticesOp.cpp
index 2b63804..43093ce 100644
--- a/src/gpu/ops/GrDrawVerticesOp.cpp
+++ b/src/gpu/ops/GrDrawVerticesOp.cpp
@@ -11,28 +11,33 @@
 #include "GrInvariantOutput.h"
 #include "GrOpFlushState.h"
 
-static sk_sp<GrGeometryProcessor> set_vertex_attributes(
-        bool hasLocalCoords,
-        int* colorOffset,
-        GrRenderTargetContext::ColorArrayType colorArrayType,
-        int* texOffset,
-        const SkMatrix& viewMatrix) {
+static sk_sp<GrGeometryProcessor> make_gp(bool clientProvidedLocalCoords,
+                                          bool pipelineReadsLocalCoords,
+                                          GrRenderTargetContext::ColorArrayType colorArrayType,
+                                          bool multipleViewMatrices,
+                                          const SkMatrix& viewMatrixIfCommon,
+                                          bool* hasLocalCoordAttribute) {
     using namespace GrDefaultGeoProcFactory;
-    *texOffset = -1;
-    *colorOffset = -1;
-
-    LocalCoords::Type localCoordsType =
-            hasLocalCoords ? LocalCoords::kHasExplicit_Type : LocalCoords::kUsePosition_Type;
-    *colorOffset = sizeof(SkPoint);
-    if (hasLocalCoords) {
-        *texOffset = sizeof(SkPoint) + sizeof(uint32_t);
+    LocalCoords::Type localCoordsType;
+    if (pipelineReadsLocalCoords) {
+        if (clientProvidedLocalCoords || multipleViewMatrices) {
+            *hasLocalCoordAttribute = true;
+            localCoordsType = LocalCoords::kHasExplicit_Type;
+        } else {
+            *hasLocalCoordAttribute = false;
+            localCoordsType = LocalCoords::kUsePosition_Type;
+        }
+    } else {
+        localCoordsType = LocalCoords::kUnused_Type;
+        *hasLocalCoordAttribute = false;
     }
+
     Color::Type colorType =
             (colorArrayType == GrRenderTargetContext::ColorArrayType::kPremulGrColor)
                     ? Color::kPremulGrColorAttribute_Type
                     : Color::kUnpremulSkColorAttribute_Type;
-    return GrDefaultGeoProcFactory::Make(colorType, Coverage::kSolid_Type, localCoordsType,
-                                         viewMatrix);
+    const SkMatrix& vm = multipleViewMatrices ? SkMatrix::I() : viewMatrixIfCommon;
+    return GrDefaultGeoProcFactory::Make(colorType, Coverage::kSolid_Type, localCoordsType, vm);
 }
 
 GrDrawVerticesOp::GrDrawVerticesOp(GrColor color, GrPrimitiveType primitiveType,
@@ -44,9 +49,9 @@
         : INHERITED(ClassID()) {
     SkASSERT(positions);
 
-    fViewMatrix = viewMatrix;
     Mesh& mesh = fMeshes.push_back();
     mesh.fColor = color;
+    mesh.fViewMatrix = viewMatrix;
 
     mesh.fPositions.append(vertexCount, positions);
     if (indices) {
@@ -98,20 +103,21 @@
         fVariableColor = false;
         fColorArrayType = GrRenderTargetContext::ColorArrayType::kPremulGrColor;
     }
-    if (!optimizations.readsLocalCoords()) {
+    if (!(fPipelineNeedsLocalCoords = optimizations.readsLocalCoords())) {
         fMeshes[0].fLocalCoords.reset();
     }
 }
 
 void GrDrawVerticesOp::onPrepareDraws(Target* target) const {
-    bool hasLocalCoords = !fMeshes[0].fLocalCoords.isEmpty();
-    int colorOffset = -1, texOffset = -1;
-    sk_sp<GrGeometryProcessor> gp(set_vertex_attributes(hasLocalCoords, &colorOffset,
-                                                        fColorArrayType, &texOffset, fViewMatrix));
+    bool clientLocalCoords = !fMeshes[0].fLocalCoords.isEmpty();
+    bool hasLocalCoordAttribute;
+    sk_sp<GrGeometryProcessor> gp =
+            make_gp(clientLocalCoords, fPipelineNeedsLocalCoords, fColorArrayType,
+                    fMultipleViewMatrices, fMeshes[0].fViewMatrix, &hasLocalCoordAttribute);
     size_t vertexStride = gp->getVertexStride();
 
     SkASSERT(vertexStride ==
-             sizeof(SkPoint) + (hasLocalCoords ? sizeof(SkPoint) : 0) + sizeof(uint32_t));
+             sizeof(SkPoint) + (hasLocalCoordAttribute ? sizeof(SkPoint) : 0) + sizeof(uint32_t));
 
     int instanceCount = fMeshes.count();
 
@@ -142,23 +148,35 @@
     int vertexOffset = 0;
     for (int i = 0; i < instanceCount; i++) {
         const Mesh& mesh = fMeshes[i];
-
-        // TODO we can actually cache this interleaved and then just memcopy
+        // Currently we require all meshes to either have explicit local coords or not, though it
+        // wouldn't be hard to allow them to mix.
+        SkASSERT(clientLocalCoords == !mesh.fLocalCoords.isEmpty());
         if (indices) {
             for (int j = 0; j < mesh.fIndices.count(); ++j, ++indexOffset) {
                 *(indices + indexOffset) = mesh.fIndices[j] + vertexOffset;
             }
         }
 
+        static constexpr size_t kColorOffset = sizeof(SkPoint);
+        static constexpr size_t kLocalCoordOffset = kColorOffset + sizeof(uint32_t);
+
         for (int j = 0; j < mesh.fPositions.count(); ++j) {
-            *((SkPoint*)verts) = mesh.fPositions[j];
-            if (mesh.fColors.isEmpty()) {
-                *(uint32_t*)((intptr_t)verts + colorOffset) = mesh.fColor;
+            if (fMultipleViewMatrices) {
+                mesh.fViewMatrix.mapPoints(((SkPoint*)verts), &mesh.fPositions[j], 1);
             } else {
-                *(uint32_t*)((intptr_t)verts + colorOffset) = mesh.fColors[j];
+                *((SkPoint*)verts) = mesh.fPositions[j];
             }
-            if (hasLocalCoords) {
-                *(SkPoint*)((intptr_t)verts + texOffset) = mesh.fLocalCoords[j];
+            if (mesh.fColors.isEmpty()) {
+                *(uint32_t*)((intptr_t)verts + kColorOffset) = mesh.fColor;
+            } else {
+                *(uint32_t*)((intptr_t)verts + kColorOffset) = mesh.fColors[j];
+            }
+            if (hasLocalCoordAttribute) {
+                if (clientLocalCoords) {
+                    *(SkPoint*)((intptr_t)verts + kLocalCoordOffset) = mesh.fLocalCoords[j];
+                } else {
+                    *(SkPoint*)((intptr_t)verts + kLocalCoordOffset) = mesh.fPositions[j];
+                }
             }
             verts = (void*)((intptr_t)verts + vertexStride);
             vertexOffset++;
@@ -188,15 +206,13 @@
         return false;
     }
 
-    // We currently use a uniform viewmatrix for this op.
-    if (!fViewMatrix.cheapEqualTo(that->fViewMatrix)) {
-        return false;
-    }
 
     if (fMeshes[0].fIndices.isEmpty() != that->fMeshes[0].fIndices.isEmpty()) {
         return false;
     }
 
+    // This could be relaxed by using positions for the one that doesn't already have explicit
+    // local coordindates.
     if (fMeshes[0].fLocalCoords.isEmpty() != that->fMeshes[0].fLocalCoords.isEmpty()) {
         return false;
     }
@@ -205,12 +221,24 @@
         return false;
     }
 
+    if (fIndexCount + that->fIndexCount > SK_MaxU16) {
+        return false;
+    }
+
     if (!fVariableColor) {
         if (that->fVariableColor || that->fMeshes[0].fColor != fMeshes[0].fColor) {
             fVariableColor = true;
         }
     }
 
+    // Check whether we are about to acquire a mesh with a different view matrix.
+    if (!fMultipleViewMatrices) {
+        if (that->fMultipleViewMatrices ||
+            !fMeshes[0].fViewMatrix.cheapEqualTo(that->fMeshes[0].fViewMatrix)) {
+            fMultipleViewMatrices = true;
+        }
+    }
+
     fMeshes.push_back_n(that->fMeshes.count(), that->fMeshes.begin());
     fVertexCount += that->fVertexCount;
     fIndexCount += that->fIndexCount;
diff --git a/src/gpu/ops/GrDrawVerticesOp.h b/src/gpu/ops/GrDrawVerticesOp.h
index 38b1a47..a2560af 100644
--- a/src/gpu/ops/GrDrawVerticesOp.h
+++ b/src/gpu/ops/GrDrawVerticesOp.h
@@ -70,15 +70,16 @@
         SkTDArray<uint16_t> fIndices;
         SkTDArray<uint32_t> fColors;
         SkTDArray<SkPoint> fLocalCoords;
+        SkMatrix fViewMatrix;
     };
 
     GrPrimitiveType fPrimitiveType;
-    SkMatrix fViewMatrix;
     bool fVariableColor;
     int fVertexCount;
     int fIndexCount;
+    bool fMultipleViewMatrices = false;
+    bool fPipelineNeedsLocalCoords;
     GrRenderTargetContext::ColorArrayType fColorArrayType;
-
     SkSTArray<1, Mesh, true> fMeshes;
 
     typedef GrMeshDrawOp INHERITED;