CCPR: Rewrite path parsing

Creates a GrCCPRGeometry class that chops contours up into simple
segments that ccpr can render, and rewrites the GPU buffer creation to
be able to handle arbitrary lengths of ccpr geometry.

Bug: skia:
Change-Id: Iaa173a02729e177b0ed7ef7fbb9195d349be689d
Reviewed-on: https://skia-review.googlesource.com/41963
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/gpu/ccpr/GrCCPRAtlas.cpp b/src/gpu/ccpr/GrCCPRAtlas.cpp
index 8eb3086..1cd7fa8 100644
--- a/src/gpu/ccpr/GrCCPRAtlas.cpp
+++ b/src/gpu/ccpr/GrCCPRAtlas.cpp
@@ -101,7 +101,7 @@
 }
 
 sk_sp<GrRenderTargetContext> GrCCPRAtlas::finalize(GrOnFlushResourceProvider* onFlushRP,
-                                                     std::unique_ptr<GrDrawOp> atlasOp) {
+                                                   std::unique_ptr<GrDrawOp> atlasOp) {
     SkASSERT(!fTextureProxy);
 
     GrSurfaceDesc desc;
diff --git a/src/gpu/ccpr/GrCCPRCoverageOp.cpp b/src/gpu/ccpr/GrCCPRCoverageOp.cpp
new file mode 100644
index 0000000..c63b494
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCoverageOp.cpp
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCPRCoverageOp.h"
+
+#include "GrGpuCommandBuffer.h"
+#include "GrOnFlushResourceProvider.h"
+#include "GrOpFlushState.h"
+#include "SkMathPriv.h"
+#include "SkPath.h"
+#include "SkPathPriv.h"
+#include "SkPoint.h"
+#include "SkNx.h"
+#include "ccpr/GrCCPRGeometry.h"
+
+using TriangleInstance = GrCCPRCoverageProcessor::TriangleInstance;
+using CurveInstance = GrCCPRCoverageProcessor::CurveInstance;
+
+/**
+ * This is a view matrix that accumulates two bounding boxes as it maps points: device-space bounds
+ * and "45 degree" device-space bounds (| 1 -1 | * devCoords).
+ *                                      | 1  1 |
+ */
+class AccumulatingViewMatrix {
+public:
+    AccumulatingViewMatrix(const SkMatrix& m, const SkPoint& initialPoint);
+
+    SkPoint transform(const SkPoint& pt);
+    void getAccumulatedBounds(SkRect* devBounds, SkRect* devBounds45) const;
+
+private:
+    Sk4f fX;
+    Sk4f fY;
+    Sk4f fT;
+
+    Sk4f fTopLeft;
+    Sk4f fBottomRight;
+};
+
+inline AccumulatingViewMatrix::AccumulatingViewMatrix(const SkMatrix& m,
+                                                      const SkPoint& initialPoint) {
+    // m45 transforms into 45 degree space in order to find the octagon's diagonals. We could
+    // use SK_ScalarRoot2Over2 if we wanted an orthonormal transform, but this is irrelevant as
+    // long as the shader uses the correct inverse when coming back to device space.
+    SkMatrix m45;
+    m45.setSinCos(1, 1);
+    m45.preConcat(m);
+
+    fX = Sk4f(m.getScaleX(), m.getSkewY(), m45.getScaleX(), m45.getSkewY());
+    fY = Sk4f(m.getSkewX(), m.getScaleY(), m45.getSkewX(), m45.getScaleY());
+    fT = Sk4f(m.getTranslateX(), m.getTranslateY(), m45.getTranslateX(), m45.getTranslateY());
+
+    Sk4f transformed = SkNx_fma(fY, Sk4f(initialPoint.y()), fT);
+    transformed = SkNx_fma(fX, Sk4f(initialPoint.x()), transformed);
+    fTopLeft = fBottomRight = transformed;
+}
+
+inline SkPoint AccumulatingViewMatrix::transform(const SkPoint& pt) {
+    Sk4f transformed = SkNx_fma(fY, Sk4f(pt.y()), fT);
+    transformed = SkNx_fma(fX, Sk4f(pt.x()), transformed);
+
+    fTopLeft = Sk4f::Min(fTopLeft, transformed);
+    fBottomRight = Sk4f::Max(fBottomRight, transformed);
+
+    // TODO: vst1_lane_f32? (Sk4f::storeLane?)
+    float data[4];
+    transformed.store(data);
+    return SkPoint::Make(data[0], data[1]);
+}
+
+inline void AccumulatingViewMatrix::getAccumulatedBounds(SkRect* devBounds,
+                                                         SkRect* devBounds45) const {
+    float topLeft[4], bottomRight[4];
+    fTopLeft.store(topLeft);
+    fBottomRight.store(bottomRight);
+    devBounds->setLTRB(topLeft[0], topLeft[1], bottomRight[0], bottomRight[1]);
+    devBounds45->setLTRB(topLeft[2], topLeft[3], bottomRight[2], bottomRight[3]);
+}
+
+void GrCCPRCoverageOpsBuilder::parsePath(const SkMatrix& viewMatrix,
+                                         const SkPath& path, SkRect* devBounds,
+                                         SkRect* devBounds45) {
+    SkASSERT(!fParsingPath);
+    SkDEBUGCODE(fParsingPath = true);
+
+    fCurrPathPointsIdx = fGeometry.points().count();
+    fCurrPathVerbsIdx = fGeometry.verbs().count();
+    fCurrPathTallies = PrimitiveTallies();
+
+    fGeometry.beginPath();
+
+    const SkPoint* const pts = SkPathPriv::PointData(path);
+    int ptsIdx = 0;
+    bool insideContour = false;
+
+    SkASSERT(!path.isEmpty());
+    SkASSERT(path.countPoints() > 0);
+    AccumulatingViewMatrix m(viewMatrix, pts[0]);
+
+    for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
+        switch (verb) {
+            case SkPath::kMove_Verb:
+                this->endContourIfNeeded(insideContour);
+                fGeometry.beginContour(m.transform(pts[ptsIdx++]));
+                insideContour = true;
+                continue;
+            case SkPath::kClose_Verb:
+                this->endContourIfNeeded(insideContour);
+                insideContour = false;
+                continue;
+            case SkPath::kLine_Verb:
+                fGeometry.lineTo(m.transform(pts[ptsIdx++]));
+                continue;
+            case SkPath::kQuad_Verb:
+                SkASSERT(ptsIdx >= 1); // SkPath should have inserted an implicit moveTo if needed.
+                fGeometry.quadraticTo(m.transform(pts[ptsIdx]), m.transform(pts[ptsIdx + 1]));
+                ptsIdx += 2;
+                continue;
+            case SkPath::kCubic_Verb:
+                SkASSERT(ptsIdx >= 1); // SkPath should have inserted an implicit moveTo if needed.
+                fGeometry.cubicTo(m.transform(pts[ptsIdx]), m.transform(pts[ptsIdx + 1]),
+                                  m.transform(pts[ptsIdx + 2]));
+                ptsIdx += 3;
+                continue;
+            case SkPath::kConic_Verb:
+                SK_ABORT("Conics are not supported.");
+            default:
+                SK_ABORT("Unexpected path verb.");
+        }
+    }
+
+    this->endContourIfNeeded(insideContour);
+    m.getAccumulatedBounds(devBounds, devBounds45);
+}
+
+void GrCCPRCoverageOpsBuilder::endContourIfNeeded(bool insideContour) {
+    if (insideContour) {
+        fCurrPathTallies += fGeometry.endContour();
+    }
+}
+
+void GrCCPRCoverageOpsBuilder::saveParsedPath(ScissorMode scissorMode,
+                                              const SkIRect& clippedDevIBounds,
+                                              int16_t atlasOffsetX, int16_t atlasOffsetY) {
+    SkASSERT(fParsingPath);
+
+    fPathsInfo.push_back() = {
+        scissorMode,
+        (atlasOffsetY << 16) | (atlasOffsetX & 0xffff),
+        std::move(fTerminatingOp)
+    };
+
+    fTallies[(int)scissorMode] += fCurrPathTallies;
+
+    if (ScissorMode::kScissored == scissorMode) {
+        fScissorBatches.push_back() = {
+            fCurrPathTallies,
+            clippedDevIBounds.makeOffset(atlasOffsetX, atlasOffsetY)
+        };
+    }
+
+    SkDEBUGCODE(fParsingPath = false);
+}
+
+void GrCCPRCoverageOpsBuilder::discardParsedPath() {
+    SkASSERT(fParsingPath);
+
+    // The code will still work whether or not the below assertion is true. It is just unlikely that
+    // the caller would want this, and probably indicative of of a mistake. (Why emit an
+    // intermediate Op (to switch to a new atlas?), just to then throw the path away?)
+    SkASSERT(!fTerminatingOp);
+
+    fGeometry.resize_back(fCurrPathPointsIdx, fCurrPathVerbsIdx);
+    SkDEBUGCODE(fParsingPath = false);
+}
+
+void GrCCPRCoverageOpsBuilder::emitOp(SkISize drawBounds) {
+    SkASSERT(!fTerminatingOp);
+    fTerminatingOp.reset(new GrCCPRCoverageOp(std::move(fScissorBatches), drawBounds));
+    SkASSERT(fScissorBatches.empty());
+}
+
+// Emits a contour's triangle fan.
+//
+// Classic Redbook fanning would be the triangles: [0  1  2], [0  2  3], ..., [0  n-2  n-1].
+//
+// This function emits the triangle: [0  n/3  n*2/3], and then recurses on all three sides. The
+// advantage to this approach is that for a convex-ish contour, it generates larger triangles.
+// Classic fanning tends to generate long, skinny triangles, which are expensive to draw since they
+// have a longer perimeter to rasterize and antialias.
+//
+// The indices array indexes the fan's points (think: glDrawElements), and must have at least log3
+// elements past the end for this method to use as scratch space.
+//
+// Returns the next triangle instance after the final one emitted.
+static TriangleInstance* emit_recursive_fan(SkTArray<int32_t, true>& indices, int firstIndex,
+                                            int indexCount, int packedAtlasOffset,
+                                            TriangleInstance out[]) {
+    if (indexCount < 3) {
+        return out;
+    }
+
+    const int32_t oneThirdCount = indexCount / 3;
+    const int32_t twoThirdsCount = (2 * indexCount) / 3;
+    *out++ = {
+        indices[firstIndex],
+        indices[firstIndex + oneThirdCount],
+        indices[firstIndex + twoThirdsCount],
+        packedAtlasOffset
+    };
+
+    out = emit_recursive_fan(indices, firstIndex, oneThirdCount + 1, packedAtlasOffset, out);
+    out = emit_recursive_fan(indices, firstIndex + oneThirdCount,
+                             twoThirdsCount - oneThirdCount + 1, packedAtlasOffset, out);
+
+    int endIndex = firstIndex + indexCount;
+    int32_t oldValue = indices[endIndex];
+    indices[endIndex] = indices[firstIndex];
+    out = emit_recursive_fan(indices, firstIndex + twoThirdsCount, indexCount - twoThirdsCount + 1,
+                             packedAtlasOffset, out);
+    indices[endIndex] = oldValue;
+
+    return out;
+}
+
+bool GrCCPRCoverageOpsBuilder::finalize(GrOnFlushResourceProvider* onFlushRP,
+                                        SkTArray<std::unique_ptr<GrCCPRCoverageOp>>* ops) {
+    SkASSERT(!fParsingPath);
+
+    const SkTArray<SkPoint, true>& points = fGeometry.points();
+    sk_sp<GrBuffer> pointsBuffer = onFlushRP->makeBuffer(kTexel_GrBufferType,
+                                                         points.count() * 2 * sizeof(float),
+                                                         points.begin());
+    if (!pointsBuffer) {
+        return false;
+    }
+
+    // Configure the instance buffer layout.
+    PrimitiveTallies baseInstances[kNumScissorModes];
+    // int4 indices.
+    baseInstances[0].fTriangles = 0;
+    baseInstances[1].fTriangles = baseInstances[0].fTriangles + fTallies[0].fTriangles;
+    // int2 indices (curves index the buffer as int2 rather than int4).
+    baseInstances[0].fQuadratics = (baseInstances[1].fTriangles + fTallies[1].fTriangles) * 2;
+    baseInstances[1].fQuadratics = baseInstances[0].fQuadratics + fTallies[0].fQuadratics;
+    baseInstances[0].fSerpentines = baseInstances[1].fQuadratics + fTallies[1].fQuadratics;
+    baseInstances[1].fSerpentines = baseInstances[0].fSerpentines + fTallies[0].fSerpentines;
+    baseInstances[0].fLoops = baseInstances[1].fSerpentines + fTallies[1].fSerpentines;
+    baseInstances[1].fLoops = baseInstances[0].fLoops + fTallies[0].fLoops;
+    int instanceBufferSize = (baseInstances[1].fLoops + fTallies[1].fLoops) * sizeof(CurveInstance);
+
+    sk_sp<GrBuffer> instanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType,
+                                                           instanceBufferSize);
+    if (!instanceBuffer) {
+        return false;
+    }
+
+    TriangleInstance* triangleInstanceData = static_cast<TriangleInstance*>(instanceBuffer->map());
+    CurveInstance* curveInstanceData = reinterpret_cast<CurveInstance*>(triangleInstanceData);
+    SkASSERT(curveInstanceData);
+
+    PathInfo* currPathInfo = fPathsInfo.begin();
+    int32_t packedAtlasOffset;
+    int ptsIdx = -1;
+    PrimitiveTallies instanceIndices[2] = {baseInstances[0], baseInstances[1]};
+    PrimitiveTallies* currIndices;
+    SkSTArray<256, int32_t, true> currFan;
+
+#ifdef SK_DEBUG
+    int numScissoredPaths = 0;
+    int numScissorBatches = 0;
+    PrimitiveTallies initialBaseInstances[] = {baseInstances[0], baseInstances[1]};
+#endif
+
+    // Expand the ccpr verbs into GPU instance buffers.
+    for (GrCCPRGeometry::Verb verb : fGeometry.verbs()) {
+        switch (verb) {
+            case GrCCPRGeometry::Verb::kBeginPath:
+                SkASSERT(currFan.empty());
+                currIndices = &instanceIndices[(int)currPathInfo->fScissorMode];
+                packedAtlasOffset = currPathInfo->fPackedAtlasOffset;
+#ifdef SK_DEBUG
+                if (ScissorMode::kScissored == currPathInfo->fScissorMode) {
+                    ++numScissoredPaths;
+                }
+#endif
+                if (auto op = std::move(currPathInfo->fTerminatingOp)) {
+                    op->setBuffers(pointsBuffer, instanceBuffer, baseInstances, instanceIndices);
+                    baseInstances[0] = instanceIndices[0];
+                    baseInstances[1] = instanceIndices[1];
+                    SkDEBUGCODE(numScissorBatches += op->fScissorBatches.count());
+                    ops->push_back(std::move(op));
+                }
+                ++currPathInfo;
+                continue;
+
+            case GrCCPRGeometry::Verb::kBeginContour:
+                SkASSERT(currFan.empty());
+                currFan.push_back(++ptsIdx);
+                continue;
+
+            case GrCCPRGeometry::Verb::kLineTo:
+                SkASSERT(!currFan.empty());
+                currFan.push_back(++ptsIdx);
+                continue;
+
+            case GrCCPRGeometry::Verb::kMonotonicQuadraticTo:
+                SkASSERT(!currFan.empty());
+                curveInstanceData[currIndices->fQuadratics++] = {ptsIdx, packedAtlasOffset};
+                currFan.push_back(ptsIdx += 2);
+                continue;
+
+            case GrCCPRGeometry::Verb::kConvexSerpentineTo:
+                SkASSERT(!currFan.empty());
+                curveInstanceData[currIndices->fSerpentines++] = {ptsIdx, packedAtlasOffset};
+                currFan.push_back(ptsIdx += 3);
+                continue;
+
+            case GrCCPRGeometry::Verb::kConvexLoopTo:
+                SkASSERT(!currFan.empty());
+                curveInstanceData[currIndices->fLoops++] = {ptsIdx, packedAtlasOffset};
+                currFan.push_back(ptsIdx += 3);
+                continue;
+
+            case GrCCPRGeometry::Verb::kEndClosedContour: // endPt == startPt.
+                SkASSERT(!currFan.empty());
+                currFan.pop_back();
+                // fallthru.
+            case GrCCPRGeometry::Verb::kEndOpenContour: // endPt != startPt.
+                if (currFan.count() >= 3) {
+                    int fanSize = currFan.count();
+                    // Reserve space for emit_recursive_fan. Technically this can grow to
+                    // fanSize + log3(fanSize), but we approximate with log2.
+                    currFan.push_back_n(SkNextLog2(fanSize));
+                    SkDEBUGCODE(TriangleInstance* end =)
+                    emit_recursive_fan(currFan, 0, fanSize, packedAtlasOffset,
+                                       triangleInstanceData + currIndices->fTriangles);
+                    currIndices->fTriangles += fanSize - 2;
+                    SkASSERT(triangleInstanceData + currIndices->fTriangles == end);
+                }
+                currFan.reset();
+                continue;
+        }
+    }
+
+    instanceBuffer->unmap();
+
+    if (auto op = std::move(fTerminatingOp)) {
+        op->setBuffers(std::move(pointsBuffer), std::move(instanceBuffer), baseInstances,
+                       instanceIndices);
+        SkDEBUGCODE(numScissorBatches += op->fScissorBatches.count());
+        ops->push_back(std::move(op));
+    }
+
+    SkASSERT(currPathInfo == fPathsInfo.end());
+    SkASSERT(ptsIdx == points.count() - 1);
+    SkASSERT(numScissoredPaths == numScissorBatches);
+    SkASSERT(instanceIndices[0].fTriangles == initialBaseInstances[1].fTriangles);
+    SkASSERT(instanceIndices[1].fTriangles * 2 == initialBaseInstances[0].fQuadratics);
+    SkASSERT(instanceIndices[0].fQuadratics == initialBaseInstances[1].fQuadratics);
+    SkASSERT(instanceIndices[1].fQuadratics == initialBaseInstances[0].fSerpentines);
+    SkASSERT(instanceIndices[0].fSerpentines == initialBaseInstances[1].fSerpentines);
+    SkASSERT(instanceIndices[1].fSerpentines == initialBaseInstances[0].fLoops);
+    SkASSERT(instanceIndices[0].fLoops == initialBaseInstances[1].fLoops);
+    SkASSERT(instanceIndices[1].fLoops * (int) sizeof(CurveInstance) == instanceBufferSize);
+    return true;
+}
+
+void GrCCPRCoverageOp::setBuffers(sk_sp<GrBuffer> pointsBuffer, sk_sp<GrBuffer> instanceBuffer,
+                                  const PrimitiveTallies baseInstances[kNumScissorModes],
+                                  const PrimitiveTallies endInstances[kNumScissorModes]) {
+    fPointsBuffer = std::move(pointsBuffer);
+    fInstanceBuffer = std::move(instanceBuffer);
+    fBaseInstances[0] = baseInstances[0];
+    fBaseInstances[1] = baseInstances[1];
+    fInstanceCounts[0] = endInstances[0] - baseInstances[0];
+    fInstanceCounts[1] = endInstances[1] - baseInstances[1];
+}
+
+void GrCCPRCoverageOp::onExecute(GrOpFlushState* flushState) {
+    using Mode = GrCCPRCoverageProcessor::Mode;
+
+    SkDEBUGCODE(GrCCPRCoverageProcessor::Validate(flushState->drawOpArgs().fProxy));
+    SkASSERT(fPointsBuffer);
+    SkASSERT(fInstanceBuffer);
+
+    GrPipeline pipeline(flushState->drawOpArgs().fProxy, GrPipeline::ScissorState::kEnabled,
+                        SkBlendMode::kPlus);
+
+    fMeshesScratchBuffer.reserve(1 + fScissorBatches.count());
+    fDynamicStatesScratchBuffer.reserve(1 + fScissorBatches.count());
+
+    // Triangles.
+    auto constexpr kTrianglesGrPrimitiveType = GrCCPRCoverageProcessor::kTrianglesGrPrimitiveType;
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kCombinedTriangleHullsAndEdges,
+                             kTrianglesGrPrimitiveType, 3, &PrimitiveTallies::fTriangles);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kTriangleCorners,
+                             kTrianglesGrPrimitiveType, 3, &PrimitiveTallies::fTriangles);
+
+    // Quadratics.
+    auto constexpr kQuadraticsGrPrimitiveType = GrCCPRCoverageProcessor::kQuadraticsGrPrimitiveType;
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kQuadraticHulls,
+                             kQuadraticsGrPrimitiveType, 3, &PrimitiveTallies::fQuadratics);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kQuadraticCorners,
+                             kQuadraticsGrPrimitiveType, 3, &PrimitiveTallies::fQuadratics);
+
+    // Cubics.
+    auto constexpr kCubicsGrPrimitiveType = GrCCPRCoverageProcessor::kCubicsGrPrimitiveType;
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kSerpentineInsets,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fSerpentines);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kLoopInsets,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fLoops);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kSerpentineBorders,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fSerpentines);
+    this->drawMaskPrimitives(flushState, pipeline, Mode::kLoopBorders,
+                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fLoops);
+}
+
+void GrCCPRCoverageOp::drawMaskPrimitives(GrOpFlushState* flushState, const GrPipeline& pipeline,
+                                          GrCCPRCoverageProcessor::Mode mode,
+                                          GrPrimitiveType primType, int vertexCount,
+                                          int PrimitiveTallies::* instanceType) const {
+    using ScissorMode = GrCCPRCoverageOpsBuilder::ScissorMode;
+    SkASSERT(pipeline.getScissorState().enabled());
+
+    fMeshesScratchBuffer.reset();
+    fDynamicStatesScratchBuffer.reset();
+
+    if (const int instanceCount = fInstanceCounts[(int)ScissorMode::kNonScissored].*instanceType) {
+        SkASSERT(instanceCount > 0);
+        const int baseInstance = fBaseInstances[(int)ScissorMode::kNonScissored].*instanceType;
+        GrMesh& mesh = fMeshesScratchBuffer.emplace_back(primType);
+        mesh.setInstanced(fInstanceBuffer.get(), instanceCount, baseInstance, vertexCount);
+        fDynamicStatesScratchBuffer.push_back().fScissorRect.setXYWH(0, 0, fDrawBounds.width(),
+                                                                     fDrawBounds.height());
+    }
+
+    if (fInstanceCounts[(int)ScissorMode::kScissored].*instanceType) {
+        int baseInstance = fBaseInstances[(int)ScissorMode::kScissored].*instanceType;
+        for (const ScissorBatch& batch : fScissorBatches) {
+            SkASSERT(this->bounds().contains(batch.fScissor));
+            const int instanceCount = batch.fInstanceCounts.*instanceType;
+            if (!instanceCount) {
+                continue;
+            }
+            SkASSERT(instanceCount > 0);
+            GrMesh& mesh = fMeshesScratchBuffer.emplace_back(primType);
+            mesh.setInstanced(fInstanceBuffer.get(), instanceCount, baseInstance, vertexCount);
+            fDynamicStatesScratchBuffer.push_back().fScissorRect = batch.fScissor;
+            baseInstance += instanceCount;
+        }
+    }
+
+    SkASSERT(fMeshesScratchBuffer.count() == fDynamicStatesScratchBuffer.count());
+
+    if (!fMeshesScratchBuffer.empty()) {
+        GrCCPRCoverageProcessor proc(mode, fPointsBuffer.get());
+        SkASSERT(flushState->rtCommandBuffer());
+        flushState->rtCommandBuffer()->draw(pipeline, proc, fMeshesScratchBuffer.begin(),
+                                            fDynamicStatesScratchBuffer.begin(),
+                                            fMeshesScratchBuffer.count(), this->bounds());
+    }
+}
diff --git a/src/gpu/ccpr/GrCCPRCoverageOp.h b/src/gpu/ccpr/GrCCPRCoverageOp.h
new file mode 100644
index 0000000..77013dd
--- /dev/null
+++ b/src/gpu/ccpr/GrCCPRCoverageOp.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCPRCoverageOp_DEFINED
+#define GrCCPRCoverageOp_DEFINED
+
+#include "GrMesh.h"
+#include "SkRect.h"
+#include "SkRefCnt.h"
+#include "ccpr/GrCCPRCoverageProcessor.h"
+#include "ccpr/GrCCPRGeometry.h"
+#include "ops/GrDrawOp.h"
+
+class GrCCPRCoverageOp;
+class GrOnFlushResourceProvider;
+class SkMatrix;
+class SkPath;
+
+/**
+ * This class produces GrCCPRCoverageOps that render coverage count masks and atlases. A path is
+ * added to the current op in two steps:
+ *
+ *   1) parsePath(ScissorMode, viewMatrix, path, &devBounds, &devBounds45);
+ *
+ *   <client decides where to put the mask within an atlas, if wanted>
+ *
+ *   2) saveParsedPath(offsetX, offsetY, clipBounds);
+ *
+ * The client can flush the currently saved paths to a GrCCPRCoverageOp by calling emitOp, and
+ * retrieve all emitted ops after calling finalize().
+ */
+class GrCCPRCoverageOpsBuilder {
+public:
+    // Indicates whether a path should enforce a scissor clip when rendering its mask. (Specified
+    // as an int because these values get used directly as indices into arrays.)
+    enum class ScissorMode : int {
+        kNonScissored = 0,
+        kScissored = 1
+    };
+    static constexpr int kNumScissorModes = 2;
+
+    GrCCPRCoverageOpsBuilder(int maxTotalPaths, int numSkPoints, int numSkVerbs)
+            : fPathsInfo(maxTotalPaths)
+            , fGeometry(numSkPoints, numSkVerbs)
+            , fTallies{PrimitiveTallies(), PrimitiveTallies()}
+            , fScissorBatches(maxTotalPaths) {}
+
+    ~GrCCPRCoverageOpsBuilder() {
+        // Enforce the contract that the client always calls saveParsedPath or discardParsedPath.
+        SkASSERT(!fParsingPath);
+    }
+
+    // Parses an SkPath into a temporary staging area. The path will not yet be included in the next
+    // Op unless there is a matching call to saveParsedPath. The user must complement this with a
+    // following call to either saveParsedPath or discardParsedPath.
+    //
+    // Returns two tight bounding boxes: device space and "45 degree" (| 1 -1 | * devCoords) space.
+    //                                                                 | 1  1 |
+    void parsePath(const SkMatrix&, const SkPath&, SkRect* devBounds, SkRect* devBounds45);
+
+    // Commits the currently-parsed path from staging to the next Op, and specifies whether the mask
+    // should be rendered with a scissor clip in effect. Accepts an optional post-device-space
+    // translate for placement in an atlas.
+    void saveParsedPath(ScissorMode, const SkIRect& clippedDevIBounds,
+                        int16_t atlasOffsetX, int16_t atlasOffsetY);
+    void discardParsedPath();
+
+    // Flushes all currently-saved paths internally to a GrCCPRCoverageOp.
+    //
+    // NOTE: if there is a parsed path in the staging area, it will not be included. But the client
+    // may still call saveParsedPath to include it in a future Op.
+    void emitOp(SkISize drawBounds);
+
+    // Builds GPU buffers and returns the list of GrCCPRCoverageOps as specified by calls to emitOp.
+    bool finalize(GrOnFlushResourceProvider*, SkTArray<std::unique_ptr<GrCCPRCoverageOp>>*);
+
+private:
+    using PrimitiveTallies = GrCCPRGeometry::PrimitiveTallies;
+
+    // Every kBeginPath verb has a corresponding PathInfo entry.
+    struct PathInfo {
+        ScissorMode   fScissorMode;
+        int32_t       fPackedAtlasOffset; // (offsetY << 16) | (offsetX & 0xffff)
+        std::unique_ptr<GrCCPRCoverageOp>  fTerminatingOp;
+    };
+
+    // Every PathInfo with a mode of kScissored has a corresponding ScissorBatch.
+    struct ScissorBatch {
+        PrimitiveTallies   fInstanceCounts;
+        SkIRect            fScissor;
+    };
+
+    void endContourIfNeeded(bool insideContour);
+
+    // Staging area for the path being parsed.
+    SkDEBUGCODE(int                    fParsingPath = false);
+    int                                fCurrPathPointsIdx;
+    int                                fCurrPathVerbsIdx;
+    PrimitiveTallies                   fCurrPathTallies;
+
+    SkSTArray<32, PathInfo, true>      fPathsInfo;
+
+    GrCCPRGeometry                     fGeometry;
+
+    PrimitiveTallies                   fTallies[kNumScissorModes];
+    SkTArray<ScissorBatch, true>       fScissorBatches;
+
+    std::unique_ptr<GrCCPRCoverageOp>  fTerminatingOp;
+
+    friend class GrCCPRCoverageOp; // For ScissorBatch.
+};
+
+/**
+ * This Op renders coverage count masks and atlases. Create it using GrCCPRCoverageOpsBuilder.
+ */
+class GrCCPRCoverageOp : public GrDrawOp {
+public:
+    DEFINE_OP_CLASS_ID
+
+    // GrDrawOp interface.
+    const char* name() const override { return "GrCCPRCoverageOp"; }
+    FixedFunctionFlags fixedFunctionFlags() const override { return FixedFunctionFlags::kNone; }
+    RequiresDstTexture finalize(const GrCaps&, const GrAppliedClip*) override {
+        return RequiresDstTexture::kNo;
+    }
+    bool onCombineIfPossible(GrOp* other, const GrCaps& caps) override { return false; }
+    void onPrepare(GrOpFlushState*) override {}
+    void onExecute(GrOpFlushState*) override;
+
+private:
+    static constexpr int kNumScissorModes = GrCCPRCoverageOpsBuilder::kNumScissorModes;
+    using PrimitiveTallies = GrCCPRGeometry::PrimitiveTallies;
+    using ScissorBatch = GrCCPRCoverageOpsBuilder::ScissorBatch;
+
+    GrCCPRCoverageOp(SkTArray<ScissorBatch, true>&& scissorBatches, const SkISize& drawBounds)
+        : INHERITED(ClassID())
+        , fScissorBatches(std::move(scissorBatches))
+        , fDrawBounds(drawBounds) {
+        this->setBounds(SkRect::MakeIWH(fDrawBounds.width(), fDrawBounds.height()),
+                        GrOp::HasAABloat::kNo, GrOp::IsZeroArea::kNo);
+    }
+
+    void setBuffers(sk_sp<GrBuffer> pointsBuffer, sk_sp<GrBuffer> instanceBuffer,
+                    const PrimitiveTallies baseInstances[kNumScissorModes],
+                    const PrimitiveTallies endInstances[kNumScissorModes]);
+
+    void drawMaskPrimitives(GrOpFlushState*, const GrPipeline&, const GrCCPRCoverageProcessor::Mode,
+                            GrPrimitiveType, int vertexCount,
+                            int PrimitiveTallies::* instanceType) const;
+
+    sk_sp<GrBuffer>                      fPointsBuffer;
+    sk_sp<GrBuffer>                      fInstanceBuffer;
+    PrimitiveTallies                     fBaseInstances[kNumScissorModes];
+    PrimitiveTallies                     fInstanceCounts[kNumScissorModes];
+    const SkTArray<ScissorBatch, true>   fScissorBatches;
+    const SkISize                        fDrawBounds;
+
+    mutable SkTArray<GrMesh>                     fMeshesScratchBuffer;
+    mutable SkTArray<GrPipeline::DynamicState>   fDynamicStatesScratchBuffer;
+
+    friend class GrCCPRCoverageOpsBuilder;
+
+    typedef GrDrawOp INHERITED;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp
deleted file mode 100644
index f943f67..0000000
--- a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp
+++ /dev/null
@@ -1,645 +0,0 @@
-/*
- * Copyright 2017 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "GrCCPRCoverageOpsBuilder.h"
-
-#include "GrBuffer.h"
-#include "GrGpuCommandBuffer.h"
-#include "GrOnFlushResourceProvider.h"
-#include "GrOpFlushState.h"
-#include "SkGeometry.h"
-#include "SkMakeUnique.h"
-#include "SkMathPriv.h"
-#include "SkPath.h"
-#include "SkPathPriv.h"
-#include "SkPoint.h"
-#include "SkNx.h"
-#include "ccpr/GrCCPRGeometry.h"
-#include "ops/GrDrawOp.h"
-#include "../pathops/SkPathOpsCubic.h"
-#include <numeric>
-
-class GrCCPRCoverageOpsBuilder::CoverageOp : public GrDrawOp {
-public:
-    using PrimitiveTallies = GrCCPRCoverageOpsBuilder::PrimitiveTallies;
-
-    DEFINE_OP_CLASS_ID
-
-    CoverageOp(const SkISize& drawBounds, sk_sp<GrBuffer> pointsBuffer,
-               sk_sp<GrBuffer> trianglesBuffer,
-               const PrimitiveTallies baseInstances[kNumScissorModes],
-               const PrimitiveTallies endInstances[kNumScissorModes], SkTArray<ScissorBatch>&&);
-
-    // GrDrawOp interface.
-    const char* name() const override { return "GrCCPRCoverageOpsBuilder::CoverageOp"; }
-    FixedFunctionFlags fixedFunctionFlags() const override { return FixedFunctionFlags::kNone; }
-    RequiresDstTexture finalize(const GrCaps&, const GrAppliedClip*) override {
-        return RequiresDstTexture::kNo;
-    }
-    bool onCombineIfPossible(GrOp* other, const GrCaps& caps) override { return false; }
-    void onPrepare(GrOpFlushState*) override {}
-    void onExecute(GrOpFlushState*) override;
-
-private:
-    void drawMaskPrimitives(GrOpFlushState*, const GrPipeline&, const GrCCPRCoverageProcessor::Mode,
-                            GrPrimitiveType, int vertexCount,
-                            int PrimitiveTallies::* instanceType) const;
-
-    const SkISize                  fDrawBounds;
-    const sk_sp<GrBuffer>          fPointsBuffer;
-    const sk_sp<GrBuffer>          fTrianglesBuffer;
-    const PrimitiveTallies         fBaseInstances[GrCCPRCoverageOpsBuilder::kNumScissorModes];
-    const PrimitiveTallies         fInstanceCounts[GrCCPRCoverageOpsBuilder::kNumScissorModes];
-    const SkTArray<ScissorBatch>   fScissorBatches;
-
-    mutable SkTArray<GrMesh>                     fMeshesScratchBuffer;
-    mutable SkTArray<GrPipeline::DynamicState>   fDynamicStatesScratchBuffer;
-
-    typedef GrDrawOp INHERITED;
-};
-
-/**
- * This is a view matrix that accumulates two bounding boxes as it maps points: device-space bounds
- * and "45 degree" device-space bounds (| 1 -1 | * devCoords).
- *                                      | 1  1 |
- */
-class AccumulatingViewMatrix {
-public:
-    AccumulatingViewMatrix(const SkMatrix& m, const SkPoint& initialPoint);
-
-    SkPoint transform(const SkPoint& pt);
-    void getAccumulatedBounds(SkRect* devBounds, SkRect* devBounds45) const;
-
-private:
-    Sk4f fX;
-    Sk4f fY;
-    Sk4f fT;
-
-    Sk4f fTopLeft;
-    Sk4f fBottomRight;
-};
-
-static int num_pts(uint8_t verb) {
-    switch (verb) {
-        case SkPath::kClose_Verb:
-        case SkPath::kDone_Verb:
-        default:
-            SK_ABORT("Path verb does not have an endpoint.");
-            return 0;
-        case SkPath::kMove_Verb:
-        case SkPath::kLine_Verb:
-            return 1;
-        case SkPath::kQuad_Verb:
-            return 2;
-        case SkPath::kConic_Verb:
-            return 2;
-        case SkPath::kCubic_Verb:
-            return 3;
-    }
-}
-
-static SkPoint to_skpoint(double x, double y) {
-    return {static_cast<SkScalar>(x), static_cast<SkScalar>(y)};
-}
-
-static SkPoint to_skpoint(const SkDPoint& dpoint) {
-    return to_skpoint(dpoint.fX, dpoint.fY);
-}
-
-bool GrCCPRCoverageOpsBuilder::init(GrOnFlushResourceProvider* onFlushRP,
-                                    const MaxBufferItems& maxBufferItems) {
-    const int maxPoints = maxBufferItems.fMaxFanPoints + maxBufferItems.fMaxControlPoints;
-    fPointsBuffer = onFlushRP->makeBuffer(kTexel_GrBufferType, maxPoints * 2 * sizeof(float));
-    if (!fPointsBuffer) {
-        return false;
-    }
-
-    const MaxPrimitives* const maxPrimitives = maxBufferItems.fMaxPrimitives;
-    const int maxInstances = (maxPrimitives[0].sum() + maxPrimitives[1].sum());
-    fInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType, maxInstances * 4 * sizeof(int));
-    if (!fInstanceBuffer) {
-        fPointsBuffer.reset();
-        return false;
-    }
-
-    fFanPtsIdx = 0;
-    fControlPtsIdx = maxBufferItems.fMaxFanPoints;
-    SkDEBUGCODE(fMaxFanPoints = maxBufferItems.fMaxFanPoints);
-    SkDEBUGCODE(fMaxControlPoints = maxBufferItems.fMaxControlPoints);
-
-    int baseInstance = 0;
-    for (int i = 0; i < kNumScissorModes; ++i) {
-        fBaseInstances[i].fTriangles = baseInstance;
-        baseInstance += maxPrimitives[i].fMaxTriangles;
-
-        fBaseInstances[i].fQuadratics = baseInstance;
-        baseInstance += maxPrimitives[i].fMaxQuadratics;
-
-        fBaseInstances[i].fSerpentines = baseInstance;
-        baseInstance += maxPrimitives[i].fMaxCubics;
-
-        // Loops grow backwards.
-        fBaseInstances[i].fLoops = baseInstance;
-
-        fInstanceIndices[i] = fBaseInstances[i];
-    }
-
-    fPointsData = static_cast<SkPoint*>(fPointsBuffer->map());
-    SkASSERT(fPointsData);
-    GR_STATIC_ASSERT(SK_SCALAR_IS_FLOAT);
-    GR_STATIC_ASSERT(8 == sizeof(SkPoint));
-
-    fInstanceData = static_cast<PrimitiveInstance*>(fInstanceBuffer->map());
-    SkASSERT(fInstanceData);
-
-    return true;
-}
-
-using MaxBufferItems = GrCCPRCoverageOpsBuilder::MaxBufferItems;
-
-void MaxBufferItems::countPathItems(GrCCPRCoverageOpsBuilder::ScissorMode scissorMode,
-                                    const SkPath& path) {
-    static constexpr int kMaxQuadraticSegments = 2;
-    static constexpr int kMaxCubicSegments = 3;
-
-    MaxPrimitives& maxPrimitives = fMaxPrimitives[(int)scissorMode];
-    int currFanPts = 0;
-
-    for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
-        switch (verb) {
-            case SkPath::kMove_Verb:
-            case SkPath::kClose_Verb:
-                fMaxFanPoints += currFanPts;
-                maxPrimitives.fMaxTriangles += SkTMax(0, currFanPts - 2);
-                currFanPts = SkPath::kMove_Verb == verb ? 1 : 0;
-                continue;
-            case SkPath::kLine_Verb:
-                SkASSERT(currFanPts > 0);
-                ++currFanPts;
-                continue;
-            case SkPath::kQuad_Verb:
-                SkASSERT(currFanPts > 0);
-                currFanPts += kMaxQuadraticSegments;
-                fMaxControlPoints += kMaxQuadraticSegments;
-                maxPrimitives.fMaxQuadratics += kMaxQuadraticSegments;
-                continue;
-            case SkPath::kCubic_Verb:
-                GR_STATIC_ASSERT(kMaxCubicSegments >= kMaxQuadraticSegments);
-                SkASSERT(currFanPts > 0);
-                // Over-allocate for the worst case when the cubic is chopped into 3 segments.
-                currFanPts += kMaxCubicSegments;
-                // Each cubic segment has two control points.
-                fMaxControlPoints += kMaxCubicSegments * 2;
-                maxPrimitives.fMaxCubics += kMaxCubicSegments;
-                // The cubic may also turn out to be a quadratic. While we over-allocate by a fair
-                // amount, this is still a relatively small amount of space compared to the atlas.
-                maxPrimitives.fMaxQuadratics += kMaxQuadraticSegments;
-                continue;
-            case SkPath::kConic_Verb:
-                SkASSERT(currFanPts > 0);
-                SK_ABORT("Conics are not supported.");
-            default:
-                SK_ABORT("Unexpected path verb.");
-        }
-    }
-
-    fMaxFanPoints += currFanPts;
-    maxPrimitives.fMaxTriangles += SkTMax(0, currFanPts - 2);
-
-    ++fMaxPaths;
-}
-
-void GrCCPRCoverageOpsBuilder::parsePath(ScissorMode scissorMode, const SkMatrix& viewMatrix,
-                                         const SkPath& path, SkRect* devBounds,
-                                         SkRect* devBounds45) {
-    // Make sure they haven't called finalize yet (or not called init).
-    SkASSERT(fPointsData);
-    SkASSERT(fInstanceData);
-
-    fCurrScissorMode = scissorMode;
-    fCurrPathIndices = fInstanceIndices[(int)fCurrScissorMode];
-    fCurrContourStartIdx = fFanPtsIdx;
-
-    const SkPoint* const pts = SkPathPriv::PointData(path);
-    int ptsIdx = 0;
-
-    SkASSERT(!path.isEmpty());
-    SkASSERT(path.countPoints() > 0);
-    AccumulatingViewMatrix m(viewMatrix, pts[0]);
-
-    for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
-        switch (verb) {
-            case SkPath::kMove_Verb:
-                this->startContour(m.transform(pts[ptsIdx++]));
-                continue;
-            case SkPath::kClose_Verb:
-                this->closeContour();
-                continue;
-            case SkPath::kLine_Verb:
-                this->fanTo(m.transform(pts[ptsIdx]));
-                break;
-            case SkPath::kQuad_Verb:
-                SkASSERT(ptsIdx >= 1); // SkPath should have inserted an implicit moveTo if needed.
-                this->quadraticTo(m.transform(pts[ptsIdx]), m.transform(pts[ptsIdx + 1]));
-                break;
-            case SkPath::kCubic_Verb:
-                SkASSERT(ptsIdx >= 1); // SkPath should have inserted an implicit moveTo if needed.
-                this->cubicTo(m.transform(pts[ptsIdx]), m.transform(pts[ptsIdx + 1]),
-                              m.transform(pts[ptsIdx + 2]));
-                break;
-            case SkPath::kConic_Verb:
-                SK_ABORT("Conics are not supported.");
-            default:
-                SK_ABORT("Unexpected path verb.");
-        }
-
-        ptsIdx += num_pts(verb);
-    }
-
-    this->closeContour();
-
-    m.getAccumulatedBounds(devBounds, devBounds45);
-    SkDEBUGCODE(this->validate();)
-}
-
-void GrCCPRCoverageOpsBuilder::saveParsedPath(const SkIRect& clippedDevIBounds,
-                                              int16_t atlasOffsetX, int16_t atlasOffsetY) {
-    const PrimitiveTallies& baseIndices = fInstanceIndices[(int)fCurrScissorMode];
-    const int32_t packedAtlasOffset = (atlasOffsetY << 16) | (atlasOffsetX & 0xffff);
-    for (int i = baseIndices.fTriangles; i < fCurrPathIndices.fTriangles; ++i) {
-        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
-    }
-    for (int i = baseIndices.fQuadratics; i < fCurrPathIndices.fQuadratics; ++i) {
-        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
-    }
-    for (int i = baseIndices.fSerpentines; i < fCurrPathIndices.fSerpentines; ++i) {
-        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
-    }
-    for (int i = baseIndices.fLoops - 1; i >= fCurrPathIndices.fLoops; --i) {
-        fInstanceData[i].fPackedAtlasOffset = packedAtlasOffset;
-    }
-    if (ScissorMode::kScissored == fCurrScissorMode) {
-        fScissorBatches.push_back() = {
-            fCurrPathIndices - fInstanceIndices[(int)fCurrScissorMode],
-            clippedDevIBounds.makeOffset(atlasOffsetX, atlasOffsetY)
-        };
-    }
-    fInstanceIndices[(int)fCurrScissorMode] = fCurrPathIndices;
-}
-
-void GrCCPRCoverageOpsBuilder::startContour(const SkPoint& anchorPoint) {
-    this->closeContour();
-    fPointsData[fFanPtsIdx++] = fCurrAnchorPoint = fCurrFanPoint = anchorPoint;
-    SkASSERT(fCurrContourStartIdx == fFanPtsIdx - 1);
-}
-
-void GrCCPRCoverageOpsBuilder::fanTo(const SkPoint& pt) {
-    SkASSERT(fCurrContourStartIdx < fFanPtsIdx);
-    if (pt == fCurrAnchorPoint) {
-        this->startContour(pt);
-        return;
-    }
-    fPointsData[fFanPtsIdx++] = fCurrFanPoint = pt;
-}
-
-void GrCCPRCoverageOpsBuilder::quadraticTo(SkPoint controlPt, SkPoint endPt) {
-    SkASSERT(fCurrPathIndices.fQuadratics+2 <= fBaseInstances[(int)fCurrScissorMode].fSerpentines);
-
-    SkPoint chopped[5];
-    if (GrCCPRChopMonotonicQuadratics(fCurrFanPoint, controlPt, endPt, chopped)) {
-        this->fanTo(chopped[2]);
-        fPointsData[fControlPtsIdx++] = chopped[1];
-        fInstanceData[fCurrPathIndices.fQuadratics++].fQuadraticData = {
-            fControlPtsIdx - 1,
-            fFanPtsIdx - 2
-        };
-
-        controlPt = chopped[3];
-        SkASSERT(endPt == chopped[4]);
-    }
-
-    this->fanTo(endPt);
-    fPointsData[fControlPtsIdx++] = controlPt;
-    fInstanceData[fCurrPathIndices.fQuadratics++].fQuadraticData = {
-        fControlPtsIdx - 1,
-        fFanPtsIdx - 2
-    };
-}
-
-void GrCCPRCoverageOpsBuilder::cubicTo(SkPoint controlPt1, SkPoint controlPt2, SkPoint endPt) {
-    SkPoint P[4] = {fCurrFanPoint, controlPt1, controlPt2, endPt};
-    double t[2], s[2];
-    SkCubicType type = SkClassifyCubic(P, t, s);
-
-    if (SkCubicType::kLineOrPoint == type) {
-        this->fanTo(P[3]);
-        return;
-    }
-
-    if (SkCubicType::kQuadratic == type) {
-        SkScalar x1 = P[1].y() - P[0].y(),  y1 = P[0].x() - P[1].x(),
-                 k1 = x1 * P[0].x() + y1 * P[0].y();
-        SkScalar x2 = P[2].y() - P[3].y(),  y2 = P[3].x() - P[2].x(),
-                 k2 = x2 * P[3].x() + y2 * P[3].y();
-        SkScalar rdet = 1 / (x1*y2 - y1*x2);
-        this->quadraticTo({(y2*k1 - y1*k2) * rdet, (x1*k2 - x2*k1) * rdet}, P[3]);
-        return;
-    }
-
-    SkDCubic C;
-    C.set(P);
-
-    for (int x = 0; x <= 1; ++x) {
-        if (t[x] * s[x] <= 0) { // This is equivalent to tx/sx <= 0.
-            // This technically also gets taken if tx/sx = infinity, but the code still does
-            // the right thing in that edge case.
-            continue; // Don't increment x0.
-        }
-        if (fabs(t[x]) >= fabs(s[x])) { // tx/sx >= 1.
-            break;
-        }
-
-        const double chopT = double(t[x]) / double(s[x]);
-        SkASSERT(chopT >= 0 && chopT <= 1);
-        if (chopT <= 0 || chopT >= 1) { // floating-point error.
-            continue;
-        }
-
-        SkDCubicPair chopped = C.chopAt(chopT);
-
-        // Ensure the double points are identical if this is a loop (more workarounds for FP error).
-        if (SkCubicType::kLoop == type && 0 == t[0]) {
-            chopped.pts[3] = chopped.pts[0];
-        }
-
-        // (This might put ts0/ts1 out of order, but it doesn't matter anymore at this point.)
-        this->emitCubicSegment(type, chopped.first());
-        t[x] = 0;
-        s[x] = 1;
-
-        const double r = s[1 - x] * chopT;
-        t[1 - x] -= r;
-        s[1 - x] -= r;
-
-        C = chopped.second();
-    }
-
-    this->emitCubicSegment(type, C);
-}
-
-void GrCCPRCoverageOpsBuilder::emitCubicSegment(SkCubicType type, const SkDCubic& C) {
-    SkASSERT(fCurrPathIndices.fSerpentines < fCurrPathIndices.fLoops);
-
-    fPointsData[fControlPtsIdx++] = to_skpoint(C[1]);
-    fPointsData[fControlPtsIdx++] = to_skpoint(C[2]);
-    this->fanTo(to_skpoint(C[3]));
-
-    // Serpentines grow up from the front, and loops grow down from the back.
-    fInstanceData[SkCubicType::kLoop != type ?
-                  fCurrPathIndices.fSerpentines++ : --fCurrPathIndices.fLoops].fCubicData = {
-        fControlPtsIdx - 2,
-        fFanPtsIdx - 2
-    };
-}
-
-void GrCCPRCoverageOpsBuilder::closeContour() {
-    int fanSize = fFanPtsIdx - fCurrContourStartIdx;
-    if (fanSize >= 3) {
-         // Technically this can grow to fanSize + log3(fanSize), but we approximate with log2.
-        SkAutoSTMalloc<300, int32_t> indices(fanSize + SkNextLog2(fanSize));
-        std::iota(indices.get(), indices.get() + fanSize, fCurrContourStartIdx);
-        this->emitHierarchicalFan(indices, fanSize);
-    }
-
-    // Reset the current contour.
-    fCurrContourStartIdx = fFanPtsIdx;
-}
-
-void GrCCPRCoverageOpsBuilder::emitHierarchicalFan(int32_t indices[], int count) {
-    if (count < 3) {
-        return;
-    }
-
-    const int32_t oneThirdPt = count / 3;
-    const int32_t twoThirdsPt = (2 * count) / 3;
-    SkASSERT(fCurrPathIndices.fTriangles < fBaseInstances[(int)fCurrScissorMode].fQuadratics);
-
-    fInstanceData[fCurrPathIndices.fTriangles++].fTriangleData = {
-        indices[0],
-        indices[oneThirdPt],
-        indices[twoThirdsPt]
-    };
-
-    this->emitHierarchicalFan(indices, oneThirdPt + 1);
-    this->emitHierarchicalFan(&indices[oneThirdPt], twoThirdsPt - oneThirdPt + 1);
-
-    int32_t oldIndex = indices[count];
-    indices[count] = indices[0];
-    this->emitHierarchicalFan(&indices[twoThirdsPt], count - twoThirdsPt + 1);
-    indices[count] = oldIndex;
-}
-
-std::unique_ptr<GrDrawOp> GrCCPRCoverageOpsBuilder::createIntermediateOp(SkISize drawBounds) {
-    auto op = skstd::make_unique<CoverageOp>(drawBounds, fPointsBuffer, fInstanceBuffer,
-                                             fBaseInstances, fInstanceIndices,
-                                             std::move(fScissorBatches));
-    SkASSERT(fScissorBatches.empty());
-
-    fBaseInstances[0] = fInstanceIndices[0];
-    fBaseInstances[1] = fInstanceIndices[1];
-    return std::move(op);
-}
-
-std::unique_ptr<GrDrawOp> GrCCPRCoverageOpsBuilder::finalize(SkISize drawBounds) {
-    fPointsBuffer->unmap();
-    SkDEBUGCODE(fPointsData = nullptr);
-
-    fInstanceBuffer->unmap();
-    SkDEBUGCODE(fInstanceData = nullptr);
-
-    return skstd::make_unique<CoverageOp>(drawBounds, std::move(fPointsBuffer),
-                                          std::move(fInstanceBuffer), fBaseInstances,
-                                          fInstanceIndices, std::move(fScissorBatches));
-}
-
-#ifdef SK_DEBUG
-
-void GrCCPRCoverageOpsBuilder::validate() {
-    SkASSERT(fFanPtsIdx <= fMaxFanPoints);
-    SkASSERT(fControlPtsIdx <= fMaxFanPoints + fMaxControlPoints);
-    for (int i = 0; i < kNumScissorModes; ++i) {
-        SkASSERT(fInstanceIndices[i].fTriangles <= fBaseInstances[i].fQuadratics);
-        SkASSERT(fInstanceIndices[i].fQuadratics <= fBaseInstances[i].fSerpentines);
-        SkASSERT(fInstanceIndices[i].fSerpentines <= fInstanceIndices[i].fLoops);
-    }
-}
-
-#endif
-
-using CoverageOp = GrCCPRCoverageOpsBuilder::CoverageOp;
-
-GrCCPRCoverageOpsBuilder::CoverageOp::CoverageOp(const SkISize& drawBounds,
-                                             sk_sp<GrBuffer> pointsBuffer,
-                                             sk_sp<GrBuffer> trianglesBuffer,
-                                             const PrimitiveTallies baseInstances[kNumScissorModes],
-                                             const PrimitiveTallies endInstances[kNumScissorModes],
-                                             SkTArray<ScissorBatch>&& scissorBatches)
-        : INHERITED(ClassID())
-        , fDrawBounds(drawBounds)
-        , fPointsBuffer(std::move(pointsBuffer))
-        , fTrianglesBuffer(std::move(trianglesBuffer))
-        , fBaseInstances{baseInstances[0], baseInstances[1]}
-        , fInstanceCounts{endInstances[0] - baseInstances[0], endInstances[1] - baseInstances[1]}
-        , fScissorBatches(std::move(scissorBatches)) {
-    SkASSERT(fPointsBuffer);
-    SkASSERT(fTrianglesBuffer);
-    this->setBounds(SkRect::MakeIWH(fDrawBounds.width(), fDrawBounds.height()),
-                    GrOp::HasAABloat::kNo, GrOp::IsZeroArea::kNo);
-}
-
-void CoverageOp::onExecute(GrOpFlushState* flushState) {
-    using Mode = GrCCPRCoverageProcessor::Mode;
-
-    SkDEBUGCODE(GrCCPRCoverageProcessor::Validate(flushState->drawOpArgs().fProxy));
-
-    GrPipeline pipeline(flushState->drawOpArgs().fProxy, GrPipeline::ScissorState::kEnabled,
-                        SkBlendMode::kPlus);
-
-    fMeshesScratchBuffer.reserve(1 + fScissorBatches.count());
-    fDynamicStatesScratchBuffer.reserve(1 + fScissorBatches.count());
-
-    // Triangles.
-    auto constexpr kTrianglesGrPrimitiveType = GrCCPRCoverageProcessor::kTrianglesGrPrimitiveType;
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kCombinedTriangleHullsAndEdges,
-                             kTrianglesGrPrimitiveType, 3, &PrimitiveTallies::fTriangles);
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kTriangleCorners,
-                             kTrianglesGrPrimitiveType, 3, &PrimitiveTallies::fTriangles);
-
-    // Quadratics.
-    auto constexpr kQuadraticsGrPrimitiveType = GrCCPRCoverageProcessor::kQuadraticsGrPrimitiveType;
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kQuadraticHulls,
-                             kQuadraticsGrPrimitiveType, 3, &PrimitiveTallies::fQuadratics);
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kQuadraticCorners,
-                             kQuadraticsGrPrimitiveType, 3, &PrimitiveTallies::fQuadratics);
-
-    // Cubics.
-    auto constexpr kCubicsGrPrimitiveType = GrCCPRCoverageProcessor::kCubicsGrPrimitiveType;
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kSerpentineInsets,
-                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fSerpentines);
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kLoopInsets,
-                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fLoops);
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kSerpentineBorders,
-                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fSerpentines);
-    this->drawMaskPrimitives(flushState, pipeline, Mode::kLoopBorders,
-                             kCubicsGrPrimitiveType, 4, &PrimitiveTallies::fLoops);
-}
-
-void CoverageOp::drawMaskPrimitives(GrOpFlushState* flushState, const GrPipeline& pipeline,
-                                    GrCCPRCoverageProcessor::Mode mode, GrPrimitiveType primType,
-                                    int vertexCount, int PrimitiveTallies::* instanceType) const {
-    SkASSERT(pipeline.getScissorState().enabled());
-
-    fMeshesScratchBuffer.reset();
-    fDynamicStatesScratchBuffer.reset();
-
-    if (const int instanceCount = fInstanceCounts[(int)ScissorMode::kNonScissored].*instanceType) {
-        const int baseInstance = fBaseInstances[(int)ScissorMode::kNonScissored].*instanceType;
-        // Loops grow backwards, which is indicated by a negative instance count.
-        GrMesh& mesh = fMeshesScratchBuffer.emplace_back(primType);
-        mesh.setInstanced(fTrianglesBuffer.get(), abs(instanceCount),
-                          baseInstance + SkTMin(instanceCount, 0), vertexCount);
-        fDynamicStatesScratchBuffer.push_back().fScissorRect.setXYWH(0, 0, fDrawBounds.width(),
-                                                                     fDrawBounds.height());
-    }
-
-    if (fInstanceCounts[(int)ScissorMode::kScissored].*instanceType) {
-        int baseInstance = fBaseInstances[(int)ScissorMode::kScissored].*instanceType;
-        for (const ScissorBatch& batch : fScissorBatches) {
-            SkASSERT(this->bounds().contains(batch.fScissor));
-            const int instanceCount = batch.fInstanceCounts.*instanceType;
-            if (!instanceCount) {
-                continue;
-            }
-            // Loops grow backwards, which is indicated by a negative instance count.
-            GrMesh& mesh = fMeshesScratchBuffer.emplace_back(primType);
-            mesh.setInstanced(fTrianglesBuffer.get(), abs(instanceCount),
-                              baseInstance + SkTMin(instanceCount,0), vertexCount);
-            fDynamicStatesScratchBuffer.push_back().fScissorRect = batch.fScissor;
-            baseInstance += instanceCount;
-        }
-    }
-
-    SkASSERT(fMeshesScratchBuffer.count() == fDynamicStatesScratchBuffer.count());
-
-    if (!fMeshesScratchBuffer.empty()) {
-        GrCCPRCoverageProcessor proc(mode, fPointsBuffer.get());
-        SkASSERT(flushState->rtCommandBuffer());
-        flushState->rtCommandBuffer()->draw(pipeline, proc, fMeshesScratchBuffer.begin(),
-                                            fDynamicStatesScratchBuffer.begin(),
-                                            fMeshesScratchBuffer.count(), this->bounds());
-    }
-}
-
-using PrimitiveTallies = CoverageOp::PrimitiveTallies;
-
-inline PrimitiveTallies PrimitiveTallies::operator+(const PrimitiveTallies& b) const {
-    return {fTriangles + b.fTriangles,
-            fQuadratics + b.fQuadratics,
-            fSerpentines + b.fSerpentines,
-            fLoops + b.fLoops};
-}
-
-inline PrimitiveTallies PrimitiveTallies::operator-(const PrimitiveTallies& b) const {
-    return {fTriangles - b.fTriangles,
-            fQuadratics - b.fQuadratics,
-            fSerpentines - b.fSerpentines,
-            fLoops - b.fLoops};
-}
-
-inline int PrimitiveTallies::sum() const {
-    return fTriangles + fQuadratics + fSerpentines + fLoops;
-}
-
-inline AccumulatingViewMatrix::AccumulatingViewMatrix(const SkMatrix& m,
-                                                      const SkPoint& initialPoint) {
-    // m45 transforms into 45 degree space in order to find the octagon's diagonals. We could
-    // use SK_ScalarRoot2Over2 if we wanted an orthonormal transform, but this is irrelevant as
-    // long as the shader uses the correct inverse when coming back to device space.
-    SkMatrix m45;
-    m45.setSinCos(1, 1);
-    m45.preConcat(m);
-
-    fX = Sk4f(m.getScaleX(), m.getSkewY(), m45.getScaleX(), m45.getSkewY());
-    fY = Sk4f(m.getSkewX(), m.getScaleY(), m45.getSkewX(), m45.getScaleY());
-    fT = Sk4f(m.getTranslateX(), m.getTranslateY(), m45.getTranslateX(), m45.getTranslateY());
-
-    Sk4f transformed = SkNx_fma(fY, Sk4f(initialPoint.y()), fT);
-    transformed = SkNx_fma(fX, Sk4f(initialPoint.x()), transformed);
-    fTopLeft = fBottomRight = transformed;
-}
-
-inline SkPoint AccumulatingViewMatrix::transform(const SkPoint& pt) {
-    Sk4f transformed = SkNx_fma(fY, Sk4f(pt.y()), fT);
-    transformed = SkNx_fma(fX, Sk4f(pt.x()), transformed);
-
-    fTopLeft = Sk4f::Min(fTopLeft, transformed);
-    fBottomRight = Sk4f::Max(fBottomRight, transformed);
-
-    // TODO: vst1_lane_f32? (Sk4f::storeLane?)
-    float data[4];
-    transformed.store(data);
-    return SkPoint::Make(data[0], data[1]);
-}
-
-inline void AccumulatingViewMatrix::getAccumulatedBounds(SkRect* devBounds,
-                                                         SkRect* devBounds45) const {
-    float topLeft[4], bottomRight[4];
-    fTopLeft.store(topLeft);
-    fBottomRight.store(bottomRight);
-    devBounds->setLTRB(topLeft[0], topLeft[1], bottomRight[0], bottomRight[1]);
-    devBounds45->setLTRB(topLeft[2], topLeft[3], bottomRight[2], bottomRight[3]);
-}
diff --git a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h
deleted file mode 100644
index 7648ef3..0000000
--- a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright 2017 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef GrCCPRCoverageOpsBuilder_DEFINED
-#define GrCCPRCoverageOpsBuilder_DEFINED
-
-#include "GrBuffer.h"
-#include "SkRefCnt.h"
-#include "SkRect.h"
-#include "ccpr/GrCCPRCoverageProcessor.h"
-
-class GrCCPRCoverageOp;
-class GrDrawOp;
-class GrOnFlushResourceProvider;
-class GrResourceProvider;
-class SkMatrix;
-class SkPath;
-struct SkDCubic;
-enum class SkCubicType;
-
-/**
- * This class produces GrDrawOps that render coverage count masks and atlases. A path is added to
- * the current op in two steps:
- *
- *   1) parsePath(ScissorMode, viewMatrix, path, &devBounds, &devBounds45);
- *
- *   <client decides where to put the mask within an atlas, if wanted>
- *
- *   2) saveParsedPath(offsetX, offsetY, clipBounds);
- *
- * The client can then produce a GrDrawOp for all currently saved paths by calling either
- * createIntermediateOp() or finalize().
- */
-class GrCCPRCoverageOpsBuilder {
-public:
-    // Indicates whether a path should enforce a scissor clip when rendering its mask. (Specified
-    // as an int because these values get used directly as indices into arrays.)
-    enum class ScissorMode : int {
-        kNonScissored = 0,
-        kScissored = 1
-    };
-    static constexpr int kNumScissorModes = 2;
-
-    struct MaxPrimitives {
-        int fMaxTriangles = 0;
-        int fMaxQuadratics = 0;
-        int fMaxCubics = 0;
-
-        void operator+=(const MaxPrimitives&);
-        int sum() const;
-    };
-
-    struct MaxBufferItems {
-        int             fMaxFanPoints = 0;
-        int             fMaxControlPoints = 0;
-        MaxPrimitives   fMaxPrimitives[kNumScissorModes];
-        int             fMaxPaths = 0;
-
-        void operator+=(const MaxBufferItems&);
-        void countPathItems(ScissorMode, const SkPath&);
-    };
-
-    GrCCPRCoverageOpsBuilder() : fScissorBatches(512) {
-        SkDEBUGCODE(fPointsData = nullptr;)
-        SkDEBUGCODE(fInstanceData = nullptr;)
-    }
-
-    bool init(GrOnFlushResourceProvider*, const MaxBufferItems&);
-
-    // Parses an SkPath into a temporary staging area. The path will not yet be included in the next
-    // Op until there is a matching call to saveParsedPath.
-    //
-    // Returns two tight bounding boxes: device space and "45 degree" (| 1 -1 | * devCoords) space.
-    //                                                                 | 1  1 |
-    void parsePath(ScissorMode, const SkMatrix&, const SkPath&, SkRect* devBounds,
-                   SkRect* devBounds45);
-
-    // Commits the currently-parsed path from the staging area to the GPU buffers and next Op.
-    // Accepts an optional post-device-space translate for placement in an atlas.
-    void saveParsedPath(const SkIRect& clippedDevIBounds,
-                        int16_t atlasOffsetX, int16_t atlasOffsetY);
-
-    // Flushes all currently-saved paths to a GrDrawOp and leaves the GPU buffers open to accept
-    // new paths (e.g. for when an atlas runs out of space).
-    // NOTE: if there is a parsed path in the staging area, it will not be included. But the client
-    // may still call saveParsedPath to include it in a future Op.
-    std::unique_ptr<GrDrawOp> SK_WARN_UNUSED_RESULT createIntermediateOp(SkISize drawBounds);
-
-    // Flushes the remaining saved paths to a final GrDrawOp and closes off the GPU buffers. This
-    // must be called before attempting to draw any Ops produced by this class.
-    std::unique_ptr<GrDrawOp> SK_WARN_UNUSED_RESULT finalize(SkISize drawBounds);
-
-    class CoverageOp;
-
-private:
-    using PrimitiveInstance = GrCCPRCoverageProcessor::PrimitiveInstance;
-
-    struct PrimitiveTallies {
-        int fTriangles;
-        int fQuadratics;
-        int fSerpentines;
-        int fLoops;
-
-        PrimitiveTallies operator+(const PrimitiveTallies&) const;
-        PrimitiveTallies operator-(const PrimitiveTallies&) const;
-        int sum() const;
-    };
-
-    struct ScissorBatch {
-        PrimitiveTallies   fInstanceCounts;
-        SkIRect            fScissor;
-    };
-
-    void startContour(const SkPoint& anchorPoint);
-    void fanTo(const SkPoint& pt);
-    void quadraticTo(SkPoint controlPt, SkPoint endPt);
-    void cubicTo(SkPoint controlPt1, SkPoint controlPt2, SkPoint endPt);
-    void emitCubicSegment(SkCubicType, const SkDCubic&);
-    void closeContour();
-    void emitHierarchicalFan(int32_t indices[], int count);
-    SkDEBUGCODE(void validate();)
-
-    ScissorMode              fCurrScissorMode;
-    PrimitiveTallies         fCurrPathIndices;
-    int32_t                  fCurrContourStartIdx;
-    SkPoint                  fCurrAnchorPoint;
-    SkPoint                  fCurrFanPoint;
-
-    sk_sp<GrBuffer>          fPointsBuffer;
-    SkPoint*                 fPointsData;
-    int32_t                  fFanPtsIdx;
-    int32_t                  fControlPtsIdx;
-    SkDEBUGCODE(int          fMaxFanPoints;)
-    SkDEBUGCODE(int          fMaxControlPoints;)
-
-    sk_sp<GrBuffer>          fInstanceBuffer;
-    PrimitiveInstance*       fInstanceData;
-    PrimitiveTallies         fBaseInstances[kNumScissorModes];
-    PrimitiveTallies         fInstanceIndices[kNumScissorModes];
-
-    SkTArray<ScissorBatch>   fScissorBatches;
-};
-
-inline void GrCCPRCoverageOpsBuilder::MaxBufferItems::operator+=(const MaxBufferItems& b) {
-    fMaxFanPoints += b.fMaxFanPoints;
-    fMaxControlPoints += b.fMaxControlPoints;
-    fMaxPrimitives[0] += b.fMaxPrimitives[0];
-    fMaxPrimitives[1] += b.fMaxPrimitives[1];
-    fMaxPaths += b.fMaxPaths;
-}
-
-inline void GrCCPRCoverageOpsBuilder::MaxPrimitives::operator+=(const MaxPrimitives& b) {
-    fMaxTriangles += b.fMaxTriangles;
-    fMaxQuadratics += b.fMaxQuadratics;
-    fMaxCubics += b.fMaxCubics;
-}
-
-inline int GrCCPRCoverageOpsBuilder::MaxPrimitives::sum() const {
-    return fMaxTriangles + fMaxQuadratics + fMaxCubics;
-}
-
-#endif
diff --git a/src/gpu/ccpr/GrCCPRCoverageProcessor.cpp b/src/gpu/ccpr/GrCCPRCoverageProcessor.cpp
index e4e59ff..69ec6ef 100644
--- a/src/gpu/ccpr/GrCCPRCoverageProcessor.cpp
+++ b/src/gpu/ccpr/GrCCPRCoverageProcessor.cpp
@@ -45,7 +45,7 @@
 
 GrCCPRCoverageProcessor::GrCCPRCoverageProcessor(Mode mode, GrBuffer* pointsBuffer)
         : fMode(mode)
-        , fInstanceAttrib(this->addInstanceAttrib("instance", kVec4i_GrVertexAttribType,
+        , fInstanceAttrib(this->addInstanceAttrib("instance", InstanceArrayFormat(mode),
                                                   kHigh_GrSLPrecision)) {
     fPointsBufferAccess.reset(kRG_float_GrPixelConfig, pointsBuffer, kVertex_GrShaderFlag);
     this->addBufferAccess(&fPointsBufferAccess);
@@ -121,7 +121,7 @@
                                           GrGLSLVertexBuilder* v,
                                           const TexelBufferHandle& pointsBuffer,
                                           const char* rtAdjust, GrGPArgs* gpArgs) const {
-    v->codeAppendf("int packedoffset = %s.w;", proc.instanceAttrib());
+    v->codeAppendf("int packedoffset = %s[%i];", proc.instanceAttrib(), proc.atlasOffsetIdx());
     v->codeAppend ("highp float2 atlasoffset = float2((packedoffset<<16) >> 16, "
                                                      "packedoffset >> 16);");
 
diff --git a/src/gpu/ccpr/GrCCPRCoverageProcessor.h b/src/gpu/ccpr/GrCCPRCoverageProcessor.h
index 198956a..d0b20cf 100644
--- a/src/gpu/ccpr/GrCCPRCoverageProcessor.h
+++ b/src/gpu/ccpr/GrCCPRCoverageProcessor.h
@@ -25,8 +25,8 @@
  * be used to draw the path (see GrCCPRPathProcessor).
  *
  * Caller provides the primitives' (x,y) points in an fp32x2 (RG) texel buffer, and an instance
- * buffer with a single int32x4 attrib for each primitive (defined below). There are no vertex
- * attribs.
+ * buffer with a single int32x4 attrib (for triangles) or int32x2 (for curves) defined below. There
+ * are no vertex attribs.
  *
  * Draw calls are instanced, with one vertex per bezier point (3 for triangles). They use the
  * corresponding GrPrimitiveType as defined below.
@@ -40,31 +40,21 @@
     static constexpr GrPrimitiveType kQuadraticsGrPrimitiveType = GrPrimitiveType::kTriangles;
     static constexpr GrPrimitiveType kCubicsGrPrimitiveType = GrPrimitiveType::kLinesAdjacency;
 
-    struct PrimitiveInstance {
-        union {
-            struct {
-                int32_t fPt0Idx;
-                int32_t fPt1Idx;
-                int32_t fPt2Idx;
-            } fTriangleData;
-
-            struct {
-                int32_t fControlPtIdx;
-                int32_t fEndPtsIdx; // The endpoints (P0 and P2) are adjacent in the texel buffer.
-            } fQuadraticData;
-
-            struct {
-                int32_t fControlPtsKLMRootsIdx; // The control points (P1 and P2) are adjacent in
-                                                // the texel buffer, followed immediately by the
-                                                // homogenous KLM roots ({tl,sl}, {tm,sm}).
-                int32_t fEndPtsIdx; // The endpoints (P0 and P3) are adjacent in the texel buffer.
-            } fCubicData;
-        };
-
+    struct TriangleInstance {
+        int32_t fPt0Idx;
+        int32_t fPt1Idx;
+        int32_t fPt2Idx;
         int32_t fPackedAtlasOffset; // (offsetY << 16) | (offsetX & 0xffff)
     };
 
-    GR_STATIC_ASSERT(4 * 4 == sizeof(PrimitiveInstance));
+    GR_STATIC_ASSERT(4 * 4 == sizeof(TriangleInstance));
+
+    struct CurveInstance {
+        int32_t fPtsIdx;
+        int32_t fPackedAtlasOffset; // (offsetY << 16) | (offsetX & 0xffff)
+    };
+
+    GR_STATIC_ASSERT(2 * 4 == sizeof(CurveInstance));
 
     enum class Mode {
         // Triangles.
@@ -83,11 +73,17 @@
         kLoopInsets,
         kLoopBorders
     };
+    static constexpr GrVertexAttribType InstanceArrayFormat(Mode mode) {
+        return mode < Mode::kQuadraticHulls ? kVec4i_GrVertexAttribType : kVec2i_GrVertexAttribType;
+    }
     static const char* GetProcessorName(Mode);
 
     GrCCPRCoverageProcessor(Mode, GrBuffer* pointsBuffer);
 
     const char* instanceAttrib() const { return fInstanceAttrib.fName; }
+    int atlasOffsetIdx() const {
+        return kVec4i_GrVertexAttribType == InstanceArrayFormat(fMode) ? 3 : 1;
+    }
     const char* name() const override { return GetProcessorName(fMode); }
     SkString dumpInfo() const override {
         return SkStringPrintf("%s\n%s", this->name(), this->INHERITED::dumpInfo().c_str());
diff --git a/src/gpu/ccpr/GrCCPRCubicProcessor.cpp b/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
index c978468..ad0729b 100644
--- a/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
+++ b/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
@@ -24,7 +24,7 @@
 #endif
 
     // Fetch all 4 cubic bezier points.
-    v->codeAppendf("int4 indices = int4(%s.y, %s.x, %s.x + 1, %s.y + 1);",
+    v->codeAppendf("int4 indices = int4(%s.x, %s.x + 1, %s.x + 2, %s.x + 3);",
                    proc.instanceAttrib(), proc.instanceAttrib(), proc.instanceAttrib(),
                    proc.instanceAttrib());
     v->codeAppend ("highp float4x2 bezierpts = float4x2(");
diff --git a/src/gpu/ccpr/GrCCPRCubicProcessor.h b/src/gpu/ccpr/GrCCPRCubicProcessor.h
index 26ff9ac..d445eeb 100644
--- a/src/gpu/ccpr/GrCCPRCubicProcessor.h
+++ b/src/gpu/ccpr/GrCCPRCubicProcessor.h
@@ -21,7 +21,7 @@
  *
  * The caller is expected to chop cubics at the KLM roots (a.k.a. inflection points and loop
  * intersection points, resulting in necessarily convex segments) before feeding them into this
- * processor.
+ * processor. (Use GrCCPRGeometry.)
  *
  * The curves are rendered in two passes:
  *
diff --git a/src/gpu/ccpr/GrCCPRGeometry.cpp b/src/gpu/ccpr/GrCCPRGeometry.cpp
index f756f6e..a2c0890 100644
--- a/src/gpu/ccpr/GrCCPRGeometry.cpp
+++ b/src/gpu/ccpr/GrCCPRGeometry.cpp
@@ -8,8 +8,9 @@
 #include "GrCCPRGeometry.h"
 
 #include "GrTypes.h"
+#include "SkGeometry.h"
 #include "SkPoint.h"
-#include "SkNx.h"
+#include "../pathops/SkPathOpsCubic.h"
 #include <algorithm>
 #include <cmath>
 #include <cstdlib>
@@ -19,6 +20,33 @@
 GR_STATIC_ASSERT(2 * sizeof(float) == sizeof(SkPoint));
 GR_STATIC_ASSERT(0 == offsetof(SkPoint, fX));
 
+void GrCCPRGeometry::beginPath() {
+    SkASSERT(!fBuildingContour);
+    fVerbs.push_back(Verb::kBeginPath);
+}
+
+void GrCCPRGeometry::beginContour(const SkPoint& devPt) {
+    SkASSERT(!fBuildingContour);
+
+    fCurrFanPoint = fCurrAnchorPoint = devPt;
+
+    // Store the current verb count in the fTriangles field for now. When we close the contour we
+    // will use this value to calculate the actual number of triangles in its fan.
+    fCurrContourTallies = {fVerbs.count(), 0, 0, 0};
+
+    fPoints.push_back(devPt);
+    fVerbs.push_back(Verb::kBeginContour);
+
+    SkDEBUGCODE(fBuildingContour = true;)
+}
+
+void GrCCPRGeometry::lineTo(const SkPoint& devPt) {
+    SkASSERT(fBuildingContour);
+    fCurrFanPoint = devPt;
+    fPoints.push_back(devPt);
+    fVerbs.push_back(Verb::kLineTo);
+}
+
 static inline Sk2f normalize(const Sk2f& n) {
     Sk2f nn = n*n;
     return n * (nn + SkNx_shuffle<1,0>(nn)).rsqrt();
@@ -47,17 +75,20 @@
     return SkNx_fma(t, b - a, a);
 }
 
-bool GrCCPRChopMonotonicQuadratics(const SkPoint& startPt, const SkPoint& controlPt,
-                                   const SkPoint& endPt, SkPoint dst[5]) {
-    Sk2f p0 = Sk2f::Load(&startPt);
-    Sk2f p1 = Sk2f::Load(&controlPt);
-    Sk2f p2 = Sk2f::Load(&endPt);
+void GrCCPRGeometry::quadraticTo(const SkPoint& devP0, const SkPoint& devP1) {
+    SkASSERT(fBuildingContour);
+
+    Sk2f p0 = Sk2f::Load(&fCurrFanPoint);
+    Sk2f p1 = Sk2f::Load(&devP0);
+    Sk2f p2 = Sk2f::Load(&devP1);
+    fCurrFanPoint = devP1;
 
     Sk2f tan0 = p1 - p0;
     Sk2f tan1 = p2 - p1;
     // This should almost always be this case for well-behaved curves in the real world.
     if (is_convex_curve_monotonic(p0, tan0, p2, tan1)) {
-        return false;
+        this->appendMonotonicQuadratic(p1, p2);
+        return;
     }
 
     // Chop the curve into two segments with equal curvature. To do this we find the T value whose
@@ -84,11 +115,111 @@
     Sk2f p12 = SkNx_fma(t, tan1, p1);
     Sk2f p012 = lerp(p01, p12, t);
 
-    p0.store(&dst[0]);
-    p01.store(&dst[1]);
-    p012.store(&dst[2]);
-    p12.store(&dst[3]);
-    p2.store(&dst[4]);
+    this->appendMonotonicQuadratic(p01, p012);
+    this->appendMonotonicQuadratic(p12, p2);
+}
 
-    return true;
+inline void GrCCPRGeometry::appendMonotonicQuadratic(const Sk2f& p1, const Sk2f& p2) {
+    p1.store(&fPoints.push_back());
+    p2.store(&fPoints.push_back());
+    fVerbs.push_back(Verb::kMonotonicQuadraticTo);
+    ++fCurrContourTallies.fQuadratics;
+}
+
+void GrCCPRGeometry::cubicTo(const SkPoint& devP1, const SkPoint& devP2, const SkPoint& devP3) {
+    SkASSERT(fBuildingContour);
+
+    SkPoint P[4] = {fCurrFanPoint, devP1, devP2, devP3};
+    double t[2], s[2];
+    SkCubicType type = SkClassifyCubic(P, t, s);
+
+    if (SkCubicType::kLineOrPoint == type) {
+        this->lineTo(P[3]);
+        return;
+    }
+
+    if (SkCubicType::kQuadratic == type) {
+        SkPoint quadP1 = (devP1 + devP2) * .75f - (fCurrFanPoint + devP3) * .25f;
+        this->quadraticTo(quadP1, devP3);
+        return;
+    }
+
+    fCurrFanPoint = devP3;
+
+    SkDCubic C;
+    C.set(P);
+
+    for (int x = 0; x <= 1; ++x) {
+        if (t[x] * s[x] <= 0) { // This is equivalent to tx/sx <= 0.
+            // This technically also gets taken if tx/sx = infinity, but the code still does
+            // the right thing in that edge case.
+            continue; // Don't increment x0.
+        }
+        if (fabs(t[x]) >= fabs(s[x])) { // tx/sx >= 1.
+            break;
+        }
+
+        const double chopT = double(t[x]) / double(s[x]);
+        SkASSERT(chopT >= 0 && chopT <= 1);
+        if (chopT <= 0 || chopT >= 1) { // floating-point error.
+            continue;
+        }
+
+        SkDCubicPair chopped = C.chopAt(chopT);
+
+        // Ensure the double points are identical if this is a loop (more workarounds for FP error).
+        if (SkCubicType::kLoop == type && 0 == t[0]) {
+            chopped.pts[3] = chopped.pts[0];
+        }
+
+        // (This might put ts0/ts1 out of order, but it doesn't matter anymore at this point.)
+        this->appendConvexCubic(type, chopped.first());
+        t[x] = 0;
+        s[x] = 1;
+
+        const double r = s[1 - x] * chopT;
+        t[1 - x] -= r;
+        s[1 - x] -= r;
+
+        C = chopped.second();
+    }
+
+    this->appendConvexCubic(type, C);
+}
+
+static SkPoint to_skpoint(const SkDPoint& dpoint) {
+    return {static_cast<SkScalar>(dpoint.fX), static_cast<SkScalar>(dpoint.fY)};
+}
+
+inline void GrCCPRGeometry::appendConvexCubic(SkCubicType type, const SkDCubic& C) {
+    fPoints.push_back(to_skpoint(C[1]));
+    fPoints.push_back(to_skpoint(C[2]));
+    fPoints.push_back(to_skpoint(C[3]));
+    if (SkCubicType::kLoop != type) {
+        fVerbs.push_back(Verb::kConvexSerpentineTo);
+        ++fCurrContourTallies.fSerpentines;
+    } else {
+        fVerbs.push_back(Verb::kConvexLoopTo);
+        ++fCurrContourTallies.fLoops;
+    }
+}
+
+GrCCPRGeometry::PrimitiveTallies GrCCPRGeometry::endContour() {
+    SkASSERT(fBuildingContour);
+    SkASSERT(fVerbs.count() >= fCurrContourTallies.fTriangles);
+
+    // The fTriangles field currently contains this contour's starting verb index. We can now
+    // use it to calculate the size of the contour's fan.
+    int fanSize = fVerbs.count() - fCurrContourTallies.fTriangles;
+    if (fCurrFanPoint == fCurrAnchorPoint) {
+        --fanSize;
+        fVerbs.push_back(Verb::kEndClosedContour);
+    } else {
+        fVerbs.push_back(Verb::kEndOpenContour);
+    }
+
+    fCurrContourTallies.fTriangles = SkTMax(fanSize - 2, 0);
+
+    SkDEBUGCODE(fBuildingContour = false;)
+    return fCurrContourTallies;
 }
diff --git a/src/gpu/ccpr/GrCCPRGeometry.h b/src/gpu/ccpr/GrCCPRGeometry.h
index cb8bb3a..72b84d5 100644
--- a/src/gpu/ccpr/GrCCPRGeometry.h
+++ b/src/gpu/ccpr/GrCCPRGeometry.h
@@ -8,22 +8,106 @@
 #ifndef GrGrCCPRGeometry_DEFINED
 #define GrGrCCPRGeometry_DEFINED
 
-#include "SkTypes.h"
+#include "SkNx.h"
+#include "SkPoint.h"
+#include "SkTArray.h"
 
-struct SkPoint;
+struct SkDCubic;
+enum class SkCubicType;
 
-/*
- * Ensures that a quadratic bezier is monotonic with respect to the vector between its endpoints
- * [P2 - P0]. In the event that the curve is not monotonic, it is chopped into two segments that
- * are. This should be rare for well-behaved curves in the real world.
+/**
+ * This class chops device-space contours up into a series of segments that CCPR knows how to
+ * render. (See GrCCPRGeometry::Verb.)
  *
  * NOTE: This must be done in device space, since an affine transformation can change whether a
  * curve is monotonic.
- *
- * Returns false if the curve was already monotonic.
- *         true if it was chopped into two monotonic segments, now contained in dst.
  */
-bool GrCCPRChopMonotonicQuadratics(const SkPoint& startPt, const SkPoint& controlPt,
-                                   const SkPoint& endPt, SkPoint dst[5]);
+class GrCCPRGeometry {
+public:
+    // These are the verbs that CCPR knows how to draw. If a path has any segments that don't map to
+    // this list, then they are chopped into smaller ones that do. A list of these comprise a
+    // compact representation of what can later be expanded into GPU instance data.
+    enum class Verb : uint8_t {
+        kBeginPath, // Included only for caller convenience.
+        kBeginContour,
+        kLineTo,
+        kMonotonicQuadraticTo, // Monotonic relative to the vector between its endpoints [P2 - P0].
+        kConvexSerpentineTo,
+        kConvexLoopTo,
+        kEndClosedContour, // endPt == startPt.
+        kEndOpenContour // endPt != startPt.
+    };
+
+    // These tallies track numbers of CCPR primitives are required to draw a contour.
+    struct PrimitiveTallies {
+        int fTriangles; // Number of triangles in the contour's fan.
+        int fQuadratics;
+        int fSerpentines;
+        int fLoops;
+
+        void operator+=(const PrimitiveTallies&);
+        PrimitiveTallies operator-(const PrimitiveTallies&) const;
+    };
+
+    GrCCPRGeometry(int numSkPoints = 0, int numSkVerbs = 0)
+            : fPoints(numSkPoints * 3) // Reserve for a 3x expansion in points and verbs.
+            , fVerbs(numSkVerbs * 3) {}
+
+    const SkTArray<SkPoint, true>& points() const { SkASSERT(!fBuildingContour); return fPoints; }
+    const SkTArray<Verb, true>& verbs() const { SkASSERT(!fBuildingContour); return fVerbs; }
+
+    void reset() {
+        SkASSERT(!fBuildingContour);
+        fPoints.reset();
+        fVerbs.reset();
+    }
+
+    // This is included in case the caller needs to discard previously added contours. It is up to
+    // the caller to track counts and ensure we don't pop back into the middle of a different
+    // contour.
+    void resize_back(int numPoints, int numVerbs) {
+        SkASSERT(!fBuildingContour);
+        fPoints.resize_back(numPoints);
+        fVerbs.resize_back(numVerbs);
+        SkASSERT(fVerbs.empty() || fVerbs.back() == Verb::kEndOpenContour ||
+                 fVerbs.back() == Verb::kEndClosedContour);
+    }
+
+    void beginPath();
+    void beginContour(const SkPoint& devPt);
+    void lineTo(const SkPoint& devPt);
+    void quadraticTo(const SkPoint& devP1, const SkPoint& devP2);
+    void cubicTo(const SkPoint& devP1, const SkPoint& devP2, const SkPoint& devP3);
+    PrimitiveTallies endContour(); // Returns the numbers of primitives needed to draw the contour.
+
+private:
+    inline void appendMonotonicQuadratic(const Sk2f& p1, const Sk2f& p2);
+    inline void appendConvexCubic(SkCubicType, const SkDCubic&);
+
+    // Transient state used while building a contour.
+    SkPoint                         fCurrAnchorPoint;
+    SkPoint                         fCurrFanPoint;
+    PrimitiveTallies                fCurrContourTallies;
+    SkDEBUGCODE(bool                fBuildingContour = false);
+
+    // TODO: These points could eventually be written directly to block-allocated GPU buffers.
+    SkSTArray<128, SkPoint, true>   fPoints;
+    SkSTArray<128, Verb, true>      fVerbs;
+};
+
+inline void GrCCPRGeometry::PrimitiveTallies::operator+=(const PrimitiveTallies& b) {
+    fTriangles += b.fTriangles;
+    fQuadratics += b.fQuadratics;
+    fSerpentines += b.fSerpentines;
+    fLoops += b.fLoops;
+}
+
+GrCCPRGeometry::PrimitiveTallies
+inline GrCCPRGeometry::PrimitiveTallies::operator-(const PrimitiveTallies& b) const {
+    return {fTriangles - b.fTriangles,
+            fQuadratics - b.fQuadratics,
+            fSerpentines - b.fSerpentines,
+            fLoops - b.fLoops};
+}
 
 #endif
diff --git a/src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp b/src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp
index ed5f0f3..73d0d1e 100644
--- a/src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp
+++ b/src/gpu/ccpr/GrCCPRQuadraticProcessor.cpp
@@ -16,10 +16,9 @@
                                                   const TexelBufferHandle& pointsBuffer,
                                                   const char* atlasOffset, const char* rtAdjust,
                                                   GrGPArgs* gpArgs) const {
-    v->codeAppendf("int3 indices = int3(%s.y, %s.x, %s.y + 1);",
-                   proc.instanceAttrib(), proc.instanceAttrib(), proc.instanceAttrib());
     v->codeAppend ("highp float2 self = ");
-    v->appendTexelFetch(pointsBuffer, "indices[sk_VertexID]");
+    v->appendTexelFetch(pointsBuffer,
+                        SkStringPrintf("%s.x + sk_VertexID", proc.instanceAttrib()).c_str());
     v->codeAppendf(".xy + %s;", atlasOffset);
     gpArgs->fPositionVar.set(kVec2f_GrSLType, "self");
 }
diff --git a/src/gpu/ccpr/GrCCPRQuadraticProcessor.h b/src/gpu/ccpr/GrCCPRQuadraticProcessor.h
index 1eda255..85be23e 100644
--- a/src/gpu/ccpr/GrCCPRQuadraticProcessor.h
+++ b/src/gpu/ccpr/GrCCPRQuadraticProcessor.h
@@ -18,7 +18,7 @@
  * https://www.microsoft.com/en-us/research/wp-content/uploads/2005/01/p1000-loop.pdf
  *
  * The provided curves must be monotonic with respect to the vector of their closing edge [P2 - P0].
- * Use GrPathUtils::chopMonotonicQuads.
+ * (Use GrCCPRGeometry.)
  */
 class GrCCPRQuadraticProcessor : public GrCCPRCoverageProcessor::PrimitiveProcessor {
 public:
diff --git a/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp b/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
index 596ec5e..2bec4ff 100644
--- a/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
+++ b/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
@@ -72,6 +72,7 @@
         , fOwningRTPendingOps(nullptr) {
     SkDEBUGCODE(fBaseInstance = -1);
     SkDEBUGCODE(fDebugInstanceCount = 1;)
+    SkDEBUGCODE(fDebugSkippedInstances = 0;)
 
     GrRenderTargetContext* const rtc = args.fRenderTargetContext;
 
@@ -115,9 +116,12 @@
         SkASSERT(owningRTPendingOps == fOwningRTPendingOps);
         owningRTPendingOps->fOpList.remove(that);
     } else {
-        // wasRecorded is not called when the op gets combined first. Count path items here instead.
-        SingleDraw& onlyDraw = that->getOnlyPathDraw();
-        fOwningRTPendingOps->fMaxBufferItems.countPathItems(onlyDraw.fScissorMode, onlyDraw.fPath);
+        // The Op is being combined immediately after creation, before a call to wasRecorded. In
+        // this case wasRecorded will not be called. So we count its path here instead.
+        const SingleDraw& onlyDraw = that->getOnlyPathDraw();
+        ++fOwningRTPendingOps->fNumTotalPaths;
+        fOwningRTPendingOps->fNumSkPoints += onlyDraw.fPath.countPoints();
+        fOwningRTPendingOps->fNumSkVerbs += onlyDraw.fPath.countVerbs();
     }
 
     fTailDraw->fNext = &fOwningRTPendingOps->fDrawsAllocator.push_back(that->fHeadDraw);
@@ -132,21 +136,17 @@
 
 void DrawPathsOp::wasRecorded(GrRenderTargetOpList* opList) {
     SkASSERT(!fOwningRTPendingOps);
-    SingleDraw& onlyDraw = this->getOnlyPathDraw();
+    const SingleDraw& onlyDraw = this->getOnlyPathDraw();
     fOwningRTPendingOps = &fCCPR->fRTPendingOpsMap[opList->uniqueID()];
+    ++fOwningRTPendingOps->fNumTotalPaths;
+    fOwningRTPendingOps->fNumSkPoints += onlyDraw.fPath.countPoints();
+    fOwningRTPendingOps->fNumSkVerbs += onlyDraw.fPath.countVerbs();
     fOwningRTPendingOps->fOpList.addToTail(this);
-    fOwningRTPendingOps->fMaxBufferItems.countPathItems(onlyDraw.fScissorMode, onlyDraw.fPath);
 }
 
 void GrCoverageCountingPathRenderer::preFlush(GrOnFlushResourceProvider* onFlushRP,
                                               const uint32_t* opListIDs, int numOpListIDs,
                                               SkTArray<sk_sp<GrRenderTargetContext>>* results) {
-    using PathInstance = GrCCPRPathProcessor::Instance;
-
-    SkASSERT(!fPerFlushIndexBuffer);
-    SkASSERT(!fPerFlushVertexBuffer);
-    SkASSERT(!fPerFlushInstanceBuffer);
-    SkASSERT(fPerFlushAtlases.empty());
     SkASSERT(!fFlushing);
     SkDEBUGCODE(fFlushing = true;)
 
@@ -154,8 +154,29 @@
         return; // Nothing to draw.
     }
 
+    this->setupPerFlushResources(onFlushRP, opListIDs, numOpListIDs, results);
+
+    // Erase these last, once we are done accessing data from the SingleDraw allocators.
+    for (int i = 0; i < numOpListIDs; ++i) {
+        fRTPendingOpsMap.erase(opListIDs[i]);
+    }
+}
+
+void GrCoverageCountingPathRenderer::setupPerFlushResources(GrOnFlushResourceProvider* onFlushRP,
+                                                  const uint32_t* opListIDs,
+                                                  int numOpListIDs,
+                                                  SkTArray<sk_sp<GrRenderTargetContext>>* results) {
+    using PathInstance = GrCCPRPathProcessor::Instance;
+
+    SkASSERT(!fPerFlushIndexBuffer);
+    SkASSERT(!fPerFlushVertexBuffer);
+    SkASSERT(!fPerFlushInstanceBuffer);
+    SkASSERT(fPerFlushAtlases.empty());
+
+    fPerFlushResourcesAreValid = false;
+
     SkTInternalLList<DrawPathsOp> flushingOps;
-    GrCCPRCoverageOpsBuilder::MaxBufferItems maxBufferItems;
+    int maxTotalPaths = 0, numSkPoints = 0, numSkVerbs = 0;
 
     for (int i = 0; i < numOpListIDs; ++i) {
         auto it = fRTPendingOpsMap.find(opListIDs[i]);
@@ -163,13 +184,15 @@
             RTPendingOps& rtPendingOps = it->second;
             SkASSERT(!rtPendingOps.fOpList.isEmpty());
             flushingOps.concat(std::move(rtPendingOps.fOpList));
-            maxBufferItems += rtPendingOps.fMaxBufferItems;
+            maxTotalPaths += rtPendingOps.fNumTotalPaths;
+            numSkPoints += rtPendingOps.fNumSkPoints;
+            numSkVerbs += rtPendingOps.fNumSkVerbs;
         }
     }
 
-    SkASSERT(flushingOps.isEmpty() == !maxBufferItems.fMaxPaths);
+    SkASSERT(flushingOps.isEmpty() == !maxTotalPaths);
     if (flushingOps.isEmpty()) {
-        return; // Still nothing to draw.
+        return; // Nothing to draw.
     }
 
     fPerFlushIndexBuffer = GrCCPRPathProcessor::FindOrMakeIndexBuffer(onFlushRP);
@@ -184,14 +207,8 @@
         return;
     }
 
-    GrCCPRCoverageOpsBuilder atlasOpsBuilder;
-    if (!atlasOpsBuilder.init(onFlushRP, maxBufferItems)) {
-        SkDebugf("WARNING: failed to allocate buffers for coverage ops. No paths will be drawn.\n");
-        return;
-    }
-
     fPerFlushInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType,
-                                                   maxBufferItems.fMaxPaths * sizeof(PathInstance));
+                                                   maxTotalPaths * sizeof(PathInstance));
     if (!fPerFlushInstanceBuffer) {
         SkDebugf("WARNING: failed to allocate path instance buffer. No paths will be drawn.\n");
         return;
@@ -201,29 +218,29 @@
     SkASSERT(pathInstanceData);
     int pathInstanceIdx = 0;
 
+    GrCCPRCoverageOpsBuilder atlasOpsBuilder(maxTotalPaths, numSkPoints, numSkVerbs);
     GrCCPRAtlas* atlas = nullptr;
-    SkDEBUGCODE(int skippedPaths = 0;)
+    SkDEBUGCODE(int skippedTotalPaths = 0;)
 
     SkTInternalLList<DrawPathsOp>::Iter iter;
     iter.init(flushingOps, SkTInternalLList<DrawPathsOp>::Iter::kHead_IterStart);
-    while (DrawPathsOp* op = iter.get()) {
-        SkASSERT(op->fDebugInstanceCount > 0);
-        SkASSERT(-1 == op->fBaseInstance);
-        op->fBaseInstance = pathInstanceIdx;
+    while (DrawPathsOp* drawPathOp = iter.get()) {
+        SkASSERT(drawPathOp->fDebugInstanceCount > 0);
+        SkASSERT(-1 == drawPathOp->fBaseInstance);
+        drawPathOp->fBaseInstance = pathInstanceIdx;
 
-        for (const DrawPathsOp::SingleDraw* draw = &op->fHeadDraw; draw; draw = draw->fNext) {
+        for (const auto* draw = &drawPathOp->fHeadDraw; draw; draw = draw->fNext) {
             // parsePath gives us two tight bounding boxes: one in device space, as well as a second
             // one rotated an additional 45 degrees. The path vertex shader uses these two bounding
             // boxes to generate an octagon that circumscribes the path.
             SkRect devBounds, devBounds45;
-            atlasOpsBuilder.parsePath(draw->fScissorMode, draw->fMatrix, draw->fPath, &devBounds,
-                                      &devBounds45);
+            atlasOpsBuilder.parsePath(draw->fMatrix, draw->fPath, &devBounds, &devBounds45);
 
             SkRect clippedDevBounds = devBounds;
             if (ScissorMode::kScissored == draw->fScissorMode &&
                 !clippedDevBounds.intersect(devBounds, SkRect::Make(draw->fClipBounds))) {
-                SkDEBUGCODE(--op->fDebugInstanceCount);
-                SkDEBUGCODE(++skippedPaths;)
+                SkDEBUGCODE(++drawPathOp->fDebugSkippedInstances);
+                atlasOpsBuilder.discardParsedPath();
                 continue;
             }
 
@@ -234,12 +251,9 @@
             SkIPoint16 atlasLocation;
             if (atlas && !atlas->addRect(w, h, &atlasLocation)) {
                 // The atlas is out of room and can't grow any bigger.
-                auto atlasOp = atlasOpsBuilder.createIntermediateOp(atlas->drawBounds());
-                if (auto rtc = atlas->finalize(onFlushRP, std::move(atlasOp))) {
-                    results->push_back(std::move(rtc));
-                }
-                if (pathInstanceIdx > op->fBaseInstance) {
-                    op->addAtlasBatch(atlas, pathInstanceIdx);
+                atlasOpsBuilder.emitOp(atlas->drawBounds());
+                if (pathInstanceIdx > drawPathOp->fBaseInstance) {
+                    drawPathOp->addAtlasBatch(atlas, pathInstanceIdx);
                 }
                 atlas = nullptr;
             }
@@ -262,34 +276,54 @@
                 draw->fColor
             };
 
-            atlasOpsBuilder.saveParsedPath(clippedDevIBounds, offsetX, offsetY);
+            atlasOpsBuilder.saveParsedPath(draw->fScissorMode, clippedDevIBounds, offsetX, offsetY);
         }
 
-        SkASSERT(pathInstanceIdx == op->fBaseInstance + op->fDebugInstanceCount);
-        op->addAtlasBatch(atlas, pathInstanceIdx);
+        SkASSERT(pathInstanceIdx == drawPathOp->fBaseInstance + drawPathOp->fDebugInstanceCount -
+                                    drawPathOp->fDebugSkippedInstances);
+        if (pathInstanceIdx > drawPathOp->fBaseInstance) {
+            drawPathOp->addAtlasBatch(atlas, pathInstanceIdx);
+        }
 
         iter.next();
+        SkDEBUGCODE(skippedTotalPaths += drawPathOp->fDebugSkippedInstances;)
+    }
+    SkASSERT(pathInstanceIdx == maxTotalPaths - skippedTotalPaths);
+
+    if (atlas) {
+        atlasOpsBuilder.emitOp(atlas->drawBounds());
     }
 
-    SkASSERT(pathInstanceIdx == maxBufferItems.fMaxPaths - skippedPaths);
     fPerFlushInstanceBuffer->unmap();
 
-    std::unique_ptr<GrDrawOp> atlasOp = atlasOpsBuilder.finalize(atlas->drawBounds());
-    if (auto rtc = atlas->finalize(onFlushRP, std::move(atlasOp))) {
-        results->push_back(std::move(rtc));
+    // Draw the coverage ops into their respective atlases.
+    SkSTArray<4, std::unique_ptr<GrCCPRCoverageOp>> atlasOps(fPerFlushAtlases.count());
+    if (!atlasOpsBuilder.finalize(onFlushRP, &atlasOps)) {
+        SkDebugf("WARNING: failed to allocate ccpr atlas buffers. No paths will be drawn.\n");
+        return;
     }
+    SkASSERT(atlasOps.count() == fPerFlushAtlases.count());
 
-    // Erase these last, once we are done accessing data from the SingleDraw allocators.
-    for (int i = 0; i < numOpListIDs; ++i) {
-        fRTPendingOpsMap.erase(opListIDs[i]);
+    GrTAllocator<GrCCPRAtlas>::Iter atlasIter(&fPerFlushAtlases);
+    for (std::unique_ptr<GrCCPRCoverageOp>& atlasOp : atlasOps) {
+        SkAssertResult(atlasIter.next());
+        GrCCPRAtlas* atlas = atlasIter.get();
+        SkASSERT(atlasOp->bounds() == SkRect::MakeIWH(atlas->drawBounds().width(),
+                                                      atlas->drawBounds().height()));
+        if (auto rtc = atlas->finalize(onFlushRP, std::move(atlasOp))) {
+            results->push_back(std::move(rtc));
+        }
     }
+    SkASSERT(!atlasIter.next());
+
+    fPerFlushResourcesAreValid = true;
 }
 
 void DrawPathsOp::onExecute(GrOpFlushState* flushState) {
     SkASSERT(fCCPR->fFlushing);
     SkASSERT(flushState->rtCommandBuffer());
 
-    if (!fCCPR->fPerFlushInstanceBuffer) {
+    if (!fCCPR->fPerFlushResourcesAreValid) {
         return; // Setup failed.
     }
 
@@ -323,7 +357,7 @@
         flushState->rtCommandBuffer()->draw(pipeline, coverProc, &mesh, nullptr, 1, this->bounds());
     }
 
-    SkASSERT(baseInstance == fBaseInstance + fDebugInstanceCount);
+    SkASSERT(baseInstance == fBaseInstance + fDebugInstanceCount - fDebugSkippedInstances);
 }
 
 void GrCoverageCountingPathRenderer::postFlush() {
diff --git a/src/gpu/ccpr/GrCoverageCountingPathRenderer.h b/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
index f55d0e1..e1e28a4 100644
--- a/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
+++ b/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
@@ -13,7 +13,7 @@
 #include "GrPathRenderer.h"
 #include "SkTInternalLList.h"
 #include "ccpr/GrCCPRAtlas.h"
-#include "ccpr/GrCCPRCoverageOpsBuilder.h"
+#include "ccpr/GrCCPRCoverageOp.h"
 #include "ops/GrDrawOp.h"
 #include <map>
 
@@ -106,6 +106,7 @@
         RTPendingOps*                           fOwningRTPendingOps;
         int                                     fBaseInstance;
         SkDEBUGCODE(int                         fDebugInstanceCount;)
+        SkDEBUGCODE(int                         fDebugSkippedInstances;)
         SkSTArray<1, AtlasBatch, true>          fAtlasBatches;
 
         friend class GrCoverageCountingPathRenderer;
@@ -116,9 +117,14 @@
 private:
     GrCoverageCountingPathRenderer() = default;
 
+    void setupPerFlushResources(GrOnFlushResourceProvider*, const uint32_t* opListIDs,
+                                int numOpListIDs, SkTArray<sk_sp<GrRenderTargetContext>>* results);
+
     struct RTPendingOps {
         SkTInternalLList<DrawPathsOp>                 fOpList;
-        GrCCPRCoverageOpsBuilder::MaxBufferItems      fMaxBufferItems;
+        int                                           fNumTotalPaths = 0;
+        int                                           fNumSkPoints = 0;
+        int                                           fNumSkVerbs = 0;
         GrSTAllocator<256, DrawPathsOp::SingleDraw>   fDrawsAllocator;
     };
 
@@ -129,6 +135,7 @@
     sk_sp<GrBuffer>                    fPerFlushVertexBuffer;
     sk_sp<GrBuffer>                    fPerFlushInstanceBuffer;
     GrSTAllocator<4, GrCCPRAtlas>      fPerFlushAtlases;
+    bool                               fPerFlushResourcesAreValid;
     SkDEBUGCODE(bool                   fFlushing = false;)
 };