Implement per-edge aa as extra vertices instead of interpolated edge distances

It appears that using vertex interpolation to handle coverage, instead of
evaluating per-pixel, helps significantly on Adreno GPUs. Will see after
perf comes in if it's worth having both strategies and switch depending
on the platform.

Bug: chromium:914833
Bug: b/120946388
Change-Id: Ie33417938a72aa14eba4e22711e0abf97fcfbc7d
Reviewed-on: https://skia-review.googlesource.com/c/179255
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/ops/GrFillRectOp.cpp b/src/gpu/ops/GrFillRectOp.cpp
index 5e4d8933..8efbcbd 100644
--- a/src/gpu/ops/GrFillRectOp.cpp
+++ b/src/gpu/ops/GrFillRectOp.cpp
@@ -218,7 +218,7 @@
         VertexSpec vertexSpec(this->deviceQuadType(),
                               fWideColor ? ColorType::kHalf : ColorType::kByte,
                               this->localQuadType(), fHelper.usesLocalCoords(), Domain::kNo,
-                              fHelper.aaType());
+                              fHelper.aaType(), fHelper.compatibleWithAlphaAsCoverage());
 
         sk_sp<GrGeometryProcessor> gp = GrQuadPerEdgeAA::MakeProcessor(vertexSpec);
         size_t vertexSize = gp->vertexStride();
@@ -227,8 +227,9 @@
         int vertexOffsetInBuffer = 0;
 
         // Fill the allocated vertex data
-        void* vdata = target->makeVertexSpace(vertexSize, fQuads.count() * 4, &vbuffer,
-                                              &vertexOffsetInBuffer);
+        void* vdata = target->makeVertexSpace(
+                vertexSize, fQuads.count() * vertexSpec.verticesPerQuad(),
+                &vbuffer, &vertexOffsetInBuffer);
         if (!vdata) {
             SkDebugf("Could not allocate vertices\n");
             return;
@@ -243,19 +244,10 @@
         }
 
         // Configure the mesh for the vertex data
-        GrMesh* mesh;
-        if (fQuads.count() > 1) {
-            mesh = target->allocMesh(GrPrimitiveType::kTriangles);
-            sk_sp<const GrBuffer> ibuffer = target->resourceProvider()->refQuadIndexBuffer();
-            if (!ibuffer) {
-                SkDebugf("Could not allocate quad indices\n");
-                return;
-            }
-            mesh->setIndexedPatterned(ibuffer.get(), 6, 4, fQuads.count(),
-                                      GrResourceProvider::QuadCountOfQuadBuffer());
-        } else {
-            mesh = target->allocMesh(GrPrimitiveType::kTriangleStrip);
-            mesh->setNonIndexedNonInstanced(4);
+        GrMesh* mesh = target->allocMeshes(1);
+        if (!GrQuadPerEdgeAA::ConfigureMeshIndices(target, mesh, vertexSpec, fQuads.count())) {
+            SkDebugf("Could not allocate indices\n");
+            return;
         }
         mesh->setVertexData(vbuffer, vertexOffsetInBuffer);
 
@@ -267,6 +259,13 @@
         TRACE_EVENT0("skia", TRACE_FUNC);
         const auto* that = t->cast<FillRectOp>();
 
+        if ((fHelper.aaType() == GrAAType::kCoverage ||
+             that->fHelper.aaType() == GrAAType::kCoverage) &&
+            fQuads.count() + that->fQuads.count() > GrQuadPerEdgeAA::kNumAAQuadsInIndexBuffer) {
+            // This limit on batch size seems to help on Adreno devices
+            return CombineResult::kCannotCombine;
+        }
+
         // Unlike most users of the draw op helper, this op can merge none-aa and coverage-aa
         // draw ops together, so pass true as the last argument.
         if (!fHelper.isCompatible(that->fHelper, caps, this->bounds(), that->bounds(), true)) {