Chop tessellated curves that don't fit in a patch

Previously we would completely disable hardware tessellation for a path
if there was any chance of a curve requiring more segments than
supported by the hardware. This CL updates the tessellators to simply
chop paths until they fit in patches, allowing us to finally draw any
path using hardware tessellation.

Bug: skia:10419
Change-Id: I5c9f78cda3e30b8810aff3cb908235965706f2d8
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/410977
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Jim Van Verth <jvanverth@google.com>
diff --git a/bench/TessellateBench.cpp b/bench/TessellateBench.cpp
index 2206e7e..5ba4a57 100644
--- a/bench/TessellateBench.cpp
+++ b/bench/TessellateBench.cpp
@@ -8,6 +8,7 @@
 #include "bench/Benchmark.h"
 #include "include/gpu/GrDirectContext.h"
 #include "src/core/SkPathPriv.h"
+#include "src/core/SkRectPriv.h"
 #include "src/gpu/GrDirectContextPriv.h"
 #include "src/gpu/GrOpFlushState.h"
 #include "src/gpu/geometry/GrWangsFormula.h"
@@ -42,11 +43,11 @@
     return GrDirectContext::MakeMock(&mockOptions, ctxOptions);
 }
 
-static SkPath make_cubic_path() {
+static SkPath make_cubic_path(int maxPow2) {
     SkRandom rand;
     SkPath path;
     for (int i = 0; i < kNumCubicsInChalkboard/2; ++i) {
-        float x = std::ldexp(rand.nextF(), (i % 18)) / 1e3f;
+        float x = std::ldexp(rand.nextF(), (i % maxPow2)) / 1e3f;
         path.cubicTo(111.625f*x, 308.188f*x, 764.62f*x, -435.688f*x, 742.63f*x, 85.187f*x);
         path.cubicTo(764.62f*x, -435.688f*x, 111.625f*x, 308.188f*x, 0, 0);
     }
@@ -110,19 +111,19 @@
     DEF_BENCH( return new PathTessellateBenchmark_##NAME(); ); \
     void PathTessellateBenchmark_##NAME::runBench()
 
-DEF_PATH_TESS_BENCH(GrPathIndirectTessellator, make_cubic_path(), SkMatrix::I()) {
+DEF_PATH_TESS_BENCH(GrPathIndirectTessellator, make_cubic_path(18), SkMatrix::I()) {
     GrPathIndirectTessellator tess(fMatrix, fPath, GrPathIndirectTessellator::DrawInnerFan::kNo);
-    tess.prepare(fTarget.get(), fMatrix, fPath, nullptr);
+    tess.prepare(fTarget.get(), SkRectPriv::MakeLargest(), fMatrix, fPath, nullptr);
 }
 
-DEF_PATH_TESS_BENCH(GrPathOuterCurveTessellator, make_cubic_path(), SkMatrix::I()) {
+DEF_PATH_TESS_BENCH(GrPathOuterCurveTessellator, make_cubic_path(8), SkMatrix::I()) {
     GrPathOuterCurveTessellator tess(GrPathTessellator::DrawInnerFan::kNo);
-    tess.prepare(fTarget.get(), fMatrix, fPath, nullptr);
+    tess.prepare(fTarget.get(), SkRectPriv::MakeLargest(), fMatrix, fPath, nullptr);
 }
 
-DEF_PATH_TESS_BENCH(GrPathWedgeTessellator, make_cubic_path(), SkMatrix::I()) {
+DEF_PATH_TESS_BENCH(GrPathWedgeTessellator, make_cubic_path(8), SkMatrix::I()) {
     GrPathWedgeTessellator tess;
-    tess.prepare(fTarget.get(), fMatrix, fPath, nullptr);
+    tess.prepare(fTarget.get(), SkRectPriv::MakeLargest(), fMatrix, fPath, nullptr);
 }
 
 static void benchmark_wangs_formula_cubic_log2(const SkMatrix& matrix, const SkPath& path) {
@@ -139,16 +140,16 @@
     }
 }
 
-DEF_PATH_TESS_BENCH(wangs_formula_cubic_log2, make_cubic_path(), SkMatrix::I()) {
+DEF_PATH_TESS_BENCH(wangs_formula_cubic_log2, make_cubic_path(18), SkMatrix::I()) {
     benchmark_wangs_formula_cubic_log2(fMatrix, fPath);
 }
 
-DEF_PATH_TESS_BENCH(wangs_formula_cubic_log2_scale, make_cubic_path(),
+DEF_PATH_TESS_BENCH(wangs_formula_cubic_log2_scale, make_cubic_path(18),
                     SkMatrix::Scale(1.1f, 0.9f)) {
     benchmark_wangs_formula_cubic_log2(fMatrix, fPath);
 }
 
-DEF_PATH_TESS_BENCH(wangs_formula_cubic_log2_affine, make_cubic_path(),
+DEF_PATH_TESS_BENCH(wangs_formula_cubic_log2_affine, make_cubic_path(18),
                     SkMatrix::MakeAll(.9f,0.9f,0,  1.1f,1.1f,0, 0,0,1)) {
     benchmark_wangs_formula_cubic_log2(fMatrix, fPath);
 }
@@ -323,7 +324,7 @@
 
         fTessellator = fMakeTessellatorFn(fShaderFlags, SkMatrix::Scale(fMatrixScale, fMatrixScale),
                                           fPathStrokes.data(), {fMatrixScale, fMatrixScale},
-                                          {-1e9f, -1e9f, 1e9f, 1e9f});
+                                          SkRectPriv::MakeLargest());
     }
 
     void onDraw(int loops, SkCanvas*) final {
diff --git a/include/private/SkTemplates.h b/include/private/SkTemplates.h
index 25333f9..4221ee1 100644
--- a/include/private/SkTemplates.h
+++ b/include/private/SkTemplates.h
@@ -42,7 +42,7 @@
 /**
  *  Returns a pointer to a D which comes byteOffset bytes after S.
  */
-template <typename D, typename S> static D* SkTAddOffset(S* ptr, size_t byteOffset) {
+template <typename D, typename S> static D* SkTAddOffset(S* ptr, ptrdiff_t byteOffset) {
     // The intermediate char* has the same cv-ness as D as this produces better error messages.
     // This relies on the fact that reinterpret_cast can add constness, but cannot remove it.
     return reinterpret_cast<D*>(reinterpret_cast<sknonstd::same_cv_t<char, D>*>(ptr) + byteOffset);
diff --git a/samplecode/SamplePathTessellators.cpp b/samplecode/SamplePathTessellators.cpp
index d3cf120..afb49e4 100644
--- a/samplecode/SamplePathTessellators.cpp
+++ b/samplecode/SamplePathTessellators.cpp
@@ -84,7 +84,7 @@
                 shader = alloc->make<GrCurveTessellateShader>(fMatrix);
                 break;
         }
-        fTessellator->prepare(flushState, fMatrix, fPath);
+        fTessellator->prepare(flushState, this->bounds(), fMatrix, fPath);
         auto pipeline = GrSimpleMeshDrawOpHelper::CreatePipeline(flushState, std::move(fProcessors),
                                                                  fPipelineFlags);
         fProgram = GrPathShader::MakeProgram({alloc, flushState->writeView(),
diff --git a/src/gpu/GrVertexChunkArray.h b/src/gpu/GrVertexChunkArray.h
index c27e06a..b559bfc 100644
--- a/src/gpu/GrVertexChunkArray.h
+++ b/src/gpu/GrVertexChunkArray.h
@@ -54,16 +54,29 @@
     SK_ALWAYS_INLINE GrVertexWriter appendVertices(int count) {
         SkASSERT(count > 0);
         if (fCurrChunkVertexCount + count > fCurrChunkVertexCapacity && !this->allocChunk(count)) {
+            SkDEBUGCODE(fLastAppendAmount = 0;)
             return {nullptr};
         }
         SkASSERT(fCurrChunkVertexCount + count <= fCurrChunkVertexCapacity);
         fCurrChunkVertexCount += count;
+        SkDEBUGCODE(fLastAppendAmount = count;)
         return std::exchange(fCurrChunkVertexWriter,
                              fCurrChunkVertexWriter.makeOffset(fStride * count));
     }
 
     SK_ALWAYS_INLINE GrVertexWriter appendVertex() { return this->appendVertices(1); }
 
+    // Pops the most recent 'count' contiguous vertices. Since there is no guarantee of contiguity
+    // between appends, 'count' may be no larger than the most recent call to appendVertices().
+    void popVertices(int count) {
+        SkASSERT(count <= fLastAppendAmount);
+        SkASSERT(fLastAppendAmount <= fCurrChunkVertexCount);
+        SkASSERT(count >= 0);
+        fCurrChunkVertexCount -= count;
+        fCurrChunkVertexWriter = fCurrChunkVertexWriter.makeOffset(fStride * -count);
+        SkDEBUGCODE(fLastAppendAmount -= count;)
+    }
+
 private:
     bool allocChunk(int minCount) {
         if (!fChunks->empty()) {
@@ -96,6 +109,8 @@
     GrVertexWriter fCurrChunkVertexWriter;
     int fCurrChunkVertexCount = 0;
     int fCurrChunkVertexCapacity = 0;
+
+    SkDEBUGCODE(int fLastAppendAmount = 0;)
 };
 
 #endif
diff --git a/src/gpu/GrVertexWriter.h b/src/gpu/GrVertexWriter.h
index 110b351..a4c13a3 100644
--- a/src/gpu/GrVertexWriter.h
+++ b/src/gpu/GrVertexWriter.h
@@ -42,7 +42,7 @@
     bool operator==(const GrVertexWriter& that) const { return fPtr == that.fPtr; }
     operator bool() const { return fPtr != nullptr; }
 
-    GrVertexWriter makeOffset(size_t offsetInBytes) const {
+    GrVertexWriter makeOffset(ptrdiff_t offsetInBytes) const {
         return {SkTAddOffset<void>(fPtr, offsetInBytes)};
     }
 
diff --git a/src/gpu/geometry/GrPathUtils.h b/src/gpu/geometry/GrPathUtils.h
index 5c54b19..1c749cb 100644
--- a/src/gpu/geometry/GrPathUtils.h
+++ b/src/gpu/geometry/GrPathUtils.h
@@ -136,15 +136,12 @@
 // Converts the given line to a cubic bezier.
 // NOTE: This method interpolates at 1/3 and 2/3, but if suitable in context, the cubic
 // {p0, p0, p1, p1} may also work.
-inline void convertLineToCubic(SkPoint startPt, SkPoint endPt, SkPoint out[4]) {
+inline void writeLineAsCubic(SkPoint startPt, SkPoint endPt, GrVertexWriter* writer) {
     using grvx::float2, skvx::bit_pun;
     float2 p0 = bit_pun<float2>(startPt);
     float2 p1 = bit_pun<float2>(endPt);
     float2 v = (p1 - p0) * (1/3.f);
-    out[0] = bit_pun<SkPoint>(p0);
-    out[1] = bit_pun<SkPoint>(p0 + v);
-    out[2] = bit_pun<SkPoint>(p1 - v);
-    out[3] = bit_pun<SkPoint>(p1);
+    writer->write(p0, p0 + v, p1 - v, p1);
 }
 
 // Converts the given quadratic bezier to a cubic.
diff --git a/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp b/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp
index 5716593..8f4a600 100644
--- a/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp
+++ b/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp
@@ -248,7 +248,7 @@
 
     if (fTessellator) {
         // Must be called after polysToTriangles() in order for fFanBreadcrumbs to be complete.
-        fTessellator->prepare(flushState, fViewMatrix, fPath, &fFanBreadcrumbs);
+        fTessellator->prepare(flushState, this->bounds(), fViewMatrix, fPath, &fFanBreadcrumbs);
     }
 }
 
diff --git a/src/gpu/tessellate/GrPathStencilFillOp.cpp b/src/gpu/tessellate/GrPathStencilFillOp.cpp
index f38ee3b..6cd4c89 100644
--- a/src/gpu/tessellate/GrPathStencilFillOp.cpp
+++ b/src/gpu/tessellate/GrPathStencilFillOp.cpp
@@ -152,7 +152,7 @@
         vertexAlloc.unlock(fFanVertexCount);
     }
 
-    fTessellator->prepare(flushState, fViewMatrix, fPath);
+    fTessellator->prepare(flushState, this->bounds(), fViewMatrix, fPath);
 }
 
 void GrPathStencilFillOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
diff --git a/src/gpu/tessellate/GrPathTessellator.cpp b/src/gpu/tessellate/GrPathTessellator.cpp
index afcb6e2..f39daef 100644
--- a/src/gpu/tessellate/GrPathTessellator.cpp
+++ b/src/gpu/tessellate/GrPathTessellator.cpp
@@ -11,10 +11,13 @@
 #include "src/gpu/GrGpu.h"
 #include "src/gpu/geometry/GrPathUtils.h"
 #include "src/gpu/geometry/GrWangsFormula.h"
+#include "src/gpu/tessellate/GrCullTest.h"
 #include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h"
 #include "src/gpu/tessellate/GrMidpointContourParser.h"
 #include "src/gpu/tessellate/GrStencilPathShader.h"
 
+constexpr static float kPrecision = GrTessellationPathRenderer::kLinearizationPrecision;
+
 GrPathIndirectTessellator::GrPathIndirectTessellator(const SkMatrix& viewMatrix, const SkPath& path,
                                                      DrawInnerFan drawInnerFan)
         : fDrawInnerFan(drawInnerFan != DrawInnerFan::kNo) {
@@ -24,13 +27,13 @@
         int level;
         switch (verb) {
             case SkPathVerb::kConic:
-                level = GrWangsFormula::conic_log2(1.f / kLinearizationPrecision, pts, *w, xform);
+                level = GrWangsFormula::conic_log2(1/kPrecision, pts, *w, xform);
                 break;
             case SkPathVerb::kQuad:
-                level = GrWangsFormula::quadratic_log2(kLinearizationPrecision, pts, xform);
+                level = GrWangsFormula::quadratic_log2(kPrecision, pts, xform);
                 break;
             case SkPathVerb::kCubic:
-                level = GrWangsFormula::cubic_log2(kLinearizationPrecision, pts, xform);
+                level = GrWangsFormula::cubic_log2(kPrecision, pts, xform);
                 break;
             default:
                 continue;
@@ -84,8 +87,8 @@
     return numWritten;
 }
 
-void GrPathIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkMatrix& viewMatrix,
-                                        const SkPath& path,
+void GrPathIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkRect& /*cullBounds*/,
+                                        const SkMatrix& viewMatrix, const SkPath& path,
                                         const BreadcrumbTriangleList* breadcrumbTriangleList) {
     SkASSERT(fTotalInstanceCount == 0);
     SkASSERT(fIndirectDrawCount == 0);
@@ -189,14 +192,13 @@
                 default:
                     continue;
                 case SkPathVerb::kConic:
-                    level = GrWangsFormula::conic_log2(1.f / kLinearizationPrecision, pts, *w,
-                                                       xform);
+                    level = GrWangsFormula::conic_log2(1/kPrecision, pts, *w, xform);
                     break;
                 case SkPathVerb::kQuad:
-                    level = GrWangsFormula::quadratic_log2(kLinearizationPrecision, pts, xform);
+                    level = GrWangsFormula::quadratic_log2(kPrecision, pts, xform);
                     break;
                 case SkPathVerb::kCubic:
-                    level = GrWangsFormula::cubic_log2(kLinearizationPrecision, pts, xform);
+                    level = GrWangsFormula::cubic_log2(kPrecision, pts, xform);
                     break;
             }
             if (level == 0) {
@@ -245,85 +247,291 @@
     }
 }
 
-void GrPathOuterCurveTessellator::prepare(GrMeshDrawOp::Target* target, const SkMatrix& matrix,
-                                          const SkPath& path,
+void GrPathOuterCurveTessellator::prepare(GrMeshDrawOp::Target* target, const SkRect& cullBounds,
+                                          const SkMatrix& viewMatrix, const SkPath& path,
                                           const BreadcrumbTriangleList* breadcrumbTriangleList) {
     SkASSERT(target->caps().shaderCaps()->tessellationSupport());
-    SkASSERT(!fPatchBuffer);
-    SkASSERT(fPatchVertexCount == 0);
+    SkASSERT(fVertexChunkArray.empty());
 
-    int vertexLockCount = path.countVerbs() * 4;
+    // Determine how many triangles to allocate.
+    int maxTriangles = 0;
     if (fDrawInnerFan) {
-        vertexLockCount += max_triangles_in_inner_fan(path) * 4;
+        maxTriangles += max_triangles_in_inner_fan(path);
     }
     if (breadcrumbTriangleList) {
-        vertexLockCount += breadcrumbTriangleList->count() * 4;
+        maxTriangles += breadcrumbTriangleList->count();
     }
-    GrEagerDynamicVertexAllocator vertexAlloc(target, &fPatchBuffer, &fBasePatchVertex);
-    GrVertexWriter vertexWriter = vertexAlloc.lock<SkPoint>(vertexLockCount);
-    if (!vertexWriter) {
+    // Over-allocate enough curves for 1 in 4 to chop.
+    int curveAllocCount = (path.countVerbs() * 5 + 3) / 4;  // i.e., ceil(numVerbs * 5/4)
+    int patchAllocCount = maxTriangles + curveAllocCount;
+    if (!patchAllocCount) {
         return;
     }
+    GrVertexChunkBuilder chunker(target, &fVertexChunkArray, sizeof(SkPoint) * 4, patchAllocCount);
 
-    GrMiddleOutPolygonTriangulator middleOut(
-            &vertexWriter, GrMiddleOutPolygonTriangulator::OutputType::kConicsWithInfiniteWeight,
-            path.countVerbs());
+    // Write out the triangles.
+    if (maxTriangles) {
+        GrVertexWriter vertexWriter = chunker.appendVertices(maxTriangles);
+        if (!vertexWriter) {
+            return;
+        }
+        int numRemainingTriangles = maxTriangles;
+        if (fDrawInnerFan) {
+            int numWritten = GrMiddleOutPolygonTriangulator::WritePathInnerFan(
+                    &vertexWriter,
+                    GrMiddleOutPolygonTriangulator::OutputType::kConicsWithInfiniteWeight, path);
+            numRemainingTriangles -= numWritten;
+        }
+        if (breadcrumbTriangleList) {
+            int numWritten = write_breadcrumb_triangles(&vertexWriter, breadcrumbTriangleList);
+            numRemainingTriangles -= numWritten;
+        }
+        chunker.popVertices(numRemainingTriangles);
+    }
+
+    // Writes out curve patches, chopping as necessary so none require more segments than are
+    // supported by the hardware.
+    class CurveWriter {
+    public:
+        CurveWriter(const SkRect& cullBounds, const SkMatrix& viewMatrix,
+                    const GrShaderCaps& shaderCaps)
+                : fCullTest(cullBounds, viewMatrix)
+                , fVectorXform(viewMatrix) {
+            // GrCurveTessellateShader tessellates T=0..(1/2) on the first side of the triangle and
+            // T=(1/2)..1 on the second side. This means we get double the max tessellation segments
+            // for the range T=0..1.
+            float maxSegments = shaderCaps.maxTessellationSegments() * 2;
+            fMaxSegments_pow2 = maxSegments * maxSegments;
+            fMaxSegments_pow4 = fMaxSegments_pow2 * fMaxSegments_pow2;
+        }
+
+        SK_ALWAYS_INLINE void writeQuadratic(GrVertexChunkBuilder* chunker, const SkPoint p[3]) {
+            if (GrWangsFormula::quadratic_pow4(kPrecision, p, fVectorXform) > fMaxSegments_pow4) {
+                this->chopAndWriteQuadratic(chunker, p);
+                return;
+            }
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                GrPathUtils::writeQuadAsCubic(p, &vertexWriter);
+            }
+        }
+
+        SK_ALWAYS_INLINE void writeConic(GrVertexChunkBuilder* chunker, const SkPoint p[3],
+                                         float w) {
+            if (GrWangsFormula::conic_pow2(1/kPrecision, p, w, fVectorXform) > fMaxSegments_pow2) {
+                this->chopAndWriteConic(chunker, {p, w});
+                return;
+            }
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                GrPathShader::WriteConicPatch(p, w, &vertexWriter);
+            }
+        }
+
+        SK_ALWAYS_INLINE void writeCubic(GrVertexChunkBuilder* chunker, const SkPoint p[4]) {
+            if (GrWangsFormula::cubic_pow4(kPrecision, p, fVectorXform) > fMaxSegments_pow4) {
+                this->chopAndWriteCubic(chunker, p);
+                return;
+            }
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                vertexWriter.writeArray(p, 4);
+            }
+        }
+
+    private:
+        void chopAndWriteQuadratic(GrVertexChunkBuilder* chunker, const SkPoint p[3]) {
+            SkPoint chops[5];
+            SkChopQuadAtHalf(p, chops);
+            for (int i = 0; i < 2; ++i) {
+                const SkPoint* q = chops + i*2;
+                if (fCullTest.areVisible3(q)) {
+                    this->writeQuadratic(chunker, q);
+                }
+            }
+            // Connect the two halves.
+            this->writeTriangle(chunker, chops[0], chops[2], chops[4]);
+        }
+
+        void chopAndWriteConic(GrVertexChunkBuilder* chunker, const SkConic& conic) {
+            SkConic chops[2];
+            if (!conic.chopAt(.5, chops)) {
+                return;
+            }
+            for (int i = 0; i < 2; ++i) {
+                if (fCullTest.areVisible3(chops[i].fPts)) {
+                    this->writeConic(chunker, chops[i].fPts, chops[i].fW);
+                }
+            }
+            // Connect the two halves.
+            this->writeTriangle(chunker, conic.fPts[0], chops[0].fPts[2], chops[1].fPts[2]);
+        }
+
+        void chopAndWriteCubic(GrVertexChunkBuilder* chunker, const SkPoint p[4]) {
+            SkPoint chops[7];
+            SkChopCubicAtHalf(p, chops);
+            for (int i = 0; i < 2; ++i) {
+                const SkPoint* c = chops + i*3;
+                if (fCullTest.areVisible4(c)) {
+                    this->writeCubic(chunker, c);
+                }
+            }
+            // Connect the two halves.
+            this->writeTriangle(chunker, chops[0], chops[3], chops[6]);
+        }
+
+        void writeTriangle(GrVertexChunkBuilder* chunker, SkPoint p0, SkPoint p1, SkPoint p2) {
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                vertexWriter.write(p0, p1, p2);
+                // Mark this instance as a triangle by setting it to a conic with w=Inf.
+                vertexWriter.fill(GrVertexWriter::kIEEE_32_infinity, 2);
+            }
+        }
+
+        GrCullTest fCullTest;
+        GrVectorXform fVectorXform;
+        float fMaxSegments_pow2;
+        float fMaxSegments_pow4;
+    };
+
+    CurveWriter curveWriter(cullBounds, viewMatrix, *target->caps().shaderCaps());
     for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
         switch (verb) {
-            case SkPathVerb::kMove:
-                if (fDrawInnerFan) {
-                    middleOut.closeAndMove(pts[0]);
-                }
-                continue;
-            case SkPathVerb::kClose:
-                continue;
-            case SkPathVerb::kLine:
-                break;
             case SkPathVerb::kQuad:
-                GrPathUtils::writeQuadAsCubic(pts, &vertexWriter);
-                fPatchVertexCount += 4;
-                break;
-            case SkPathVerb::kCubic:
-                vertexWriter.writeArray(pts, 4);
-                fPatchVertexCount += 4;
+                curveWriter.writeQuadratic(&chunker, pts);
                 break;
             case SkPathVerb::kConic:
-                GrPathShader::WriteConicPatch(pts, *w, &vertexWriter);
-                fPatchVertexCount += 4;
+                curveWriter.writeConic(&chunker, pts, *w);
+                break;
+            case SkPathVerb::kCubic:
+                curveWriter.writeCubic(&chunker, pts);
+                break;
+            default:
                 break;
         }
-        if (fDrawInnerFan) {
-            middleOut.pushVertex(pts[SkPathPriv::PtsInIter((unsigned)verb) - 1]);
-        }
     }
-    if (fDrawInnerFan) {
-        fPatchVertexCount += middleOut.close() * 4;
-    }
-    if (breadcrumbTriangleList) {
-        fPatchVertexCount += write_breadcrumb_triangles(&vertexWriter, breadcrumbTriangleList) * 4;
-    }
-    SkASSERT(fPatchVertexCount <= vertexLockCount);
-
-    vertexAlloc.unlock(fPatchVertexCount);
 }
 
-void GrPathWedgeTessellator::prepare(GrMeshDrawOp::Target* target, const SkMatrix& matrix,
-                                     const SkPath& path,
+void GrPathWedgeTessellator::prepare(GrMeshDrawOp::Target* target, const SkRect& cullBounds,
+                                     const SkMatrix& viewMatrix, const SkPath& path,
                                      const BreadcrumbTriangleList* breadcrumbTriangleList) {
     SkASSERT(target->caps().shaderCaps()->tessellationSupport());
     SkASSERT(!breadcrumbTriangleList);
-    SkASSERT(!fPatchBuffer);
-    SkASSERT(fPatchVertexCount == 0);
+    SkASSERT(fVertexChunkArray.empty());
 
-    // We emit one wedge per path segment. Each wedge has 5 vertices.
-    int maxVertices = max_segments_in_path(path) * 5;
-
-    GrEagerDynamicVertexAllocator vertexAlloc(target, &fPatchBuffer, &fBasePatchVertex);
-    auto* vertexData = vertexAlloc.lock<SkPoint>(maxVertices);
-    if (!vertexData) {
+    // Over-allocate enough wedges for 1 in 4 to chop.
+    int maxWedges = max_segments_in_path(path);
+    int wedgeAllocCount = (maxWedges * 5 + 3) / 4;  // i.e., ceil(maxWedges * 5/4)
+    if (!wedgeAllocCount) {
         return;
     }
+    GrVertexChunkBuilder chunker(target, &fVertexChunkArray, sizeof(SkPoint) * 5, wedgeAllocCount);
 
+    // Writes out wedge patches, chopping as necessary so none require more segments than are
+    // supported by the hardware.
+    class WedgeWriter {
+    public:
+        WedgeWriter(const SkRect& cullBounds, const SkMatrix& viewMatrix,
+                    const GrShaderCaps& shaderCaps)
+                : fCullTest(cullBounds, viewMatrix)
+                , fVectorXform(viewMatrix) {
+            float maxSegments = shaderCaps.maxTessellationSegments();
+            fMaxSegments_pow2 = maxSegments * maxSegments;
+            fMaxSegments_pow4 = fMaxSegments_pow2 * fMaxSegments_pow2;
+        }
+
+        SK_ALWAYS_INLINE void writeFlatWedge(GrVertexChunkBuilder* chunker, SkPoint p0, SkPoint p1,
+                                             SkPoint midpoint) {
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                GrPathUtils::writeLineAsCubic(p0, p1, &vertexWriter);
+                vertexWriter.write(midpoint);
+            }
+        }
+
+        SK_ALWAYS_INLINE void writeQuadraticWedge(GrVertexChunkBuilder* chunker, const SkPoint p[3],
+                                                  SkPoint midpoint) {
+            if (GrWangsFormula::quadratic_pow4(kPrecision, p, fVectorXform) > fMaxSegments_pow4) {
+                this->chopAndWriteQuadraticWedges(chunker, p, midpoint);
+                return;
+            }
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                GrPathUtils::writeQuadAsCubic(p, &vertexWriter);
+                vertexWriter.write(midpoint);
+            }
+        }
+
+        SK_ALWAYS_INLINE void writeConicWedge(GrVertexChunkBuilder* chunker, const SkPoint p[3],
+                                              float w, SkPoint midpoint) {
+            if (GrWangsFormula::conic_pow2(1/kPrecision, p, w, fVectorXform) > fMaxSegments_pow2) {
+                this->chopAndWriteConicWedges(chunker, {p, w}, midpoint);
+                return;
+            }
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                GrPathShader::WriteConicPatch(p, w, &vertexWriter);
+                vertexWriter.write(midpoint);
+            }
+        }
+
+        SK_ALWAYS_INLINE void writeCubicWedge(GrVertexChunkBuilder* chunker, const SkPoint p[4],
+                                              SkPoint midpoint) {
+            if (GrWangsFormula::cubic_pow4(kPrecision, p, fVectorXform) > fMaxSegments_pow4) {
+                this->chopAndWriteCubicWedges(chunker, p, midpoint);
+                return;
+            }
+            if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+                vertexWriter.writeArray(p, 4);
+                vertexWriter.write(midpoint);
+            }
+        }
+
+    private:
+        void chopAndWriteQuadraticWedges(GrVertexChunkBuilder* chunker, const SkPoint p[3],
+                                         SkPoint midpoint) {
+            SkPoint chops[5];
+            SkChopQuadAtHalf(p, chops);
+            for (int i = 0; i < 2; ++i) {
+                const SkPoint* q = chops + i*2;
+                if (fCullTest.areVisible3(q)) {
+                    this->writeQuadraticWedge(chunker, q, midpoint);
+                } else {
+                    this->writeFlatWedge(chunker, q[0], q[2], midpoint);
+                }
+            }
+        }
+
+        void chopAndWriteConicWedges(GrVertexChunkBuilder* chunker, const SkConic& conic,
+                                     SkPoint midpoint) {
+            SkConic chops[2];
+            if (!conic.chopAt(.5, chops)) {
+                return;
+            }
+            for (int i = 0; i < 2; ++i) {
+                if (fCullTest.areVisible3(chops[i].fPts)) {
+                    this->writeConicWedge(chunker, chops[i].fPts, chops[i].fW, midpoint);
+                } else {
+                    this->writeFlatWedge(chunker, chops[i].fPts[0], chops[i].fPts[2], midpoint);
+                }
+            }
+        }
+
+        void chopAndWriteCubicWedges(GrVertexChunkBuilder* chunker, const SkPoint p[4],
+                                     SkPoint midpoint) {
+            SkPoint chops[7];
+            SkChopCubicAtHalf(p, chops);
+            for (int i = 0; i < 2; ++i) {
+                const SkPoint* c = chops + i*3;
+                if (fCullTest.areVisible4(c)) {
+                    this->writeCubicWedge(chunker, c, midpoint);
+                } else {
+                    this->writeFlatWedge(chunker, c[0], c[3], midpoint);
+                }
+            }
+        }
+
+        GrCullTest fCullTest;
+        GrVectorXform fVectorXform;
+        float fMaxSegments_pow2;
+        float fMaxSegments_pow4;
+    };
+
+    WedgeWriter wedgeWriter(cullBounds, viewMatrix, *target->caps().shaderCaps());
     GrMidpointContourParser parser(path);
     while (parser.parseNextContour()) {
         SkPoint midpoint = parser.currentMidpoint();
@@ -333,43 +541,37 @@
             switch (verb) {
                 case SkPathVerb::kMove:
                     startPoint = lastPoint = pts[0];
-                    continue;
+                    break;
                 case SkPathVerb::kClose:
-                    continue;  // Ignore. We can assume an implicit close at the end.
+                    break;  // Ignore. We can assume an implicit close at the end.
                 case SkPathVerb::kLine:
-                    GrPathUtils::convertLineToCubic(pts[0], pts[1], vertexData + fPatchVertexCount);
+                    wedgeWriter.writeFlatWedge(&chunker, pts[0], pts[1], midpoint);
                     lastPoint = pts[1];
                     break;
                 case SkPathVerb::kQuad:
-                    GrPathUtils::convertQuadToCubic(pts, vertexData + fPatchVertexCount);
+                    wedgeWriter.writeQuadraticWedge(&chunker, pts, midpoint);
+                    lastPoint = pts[2];
+                    break;
+                case SkPathVerb::kConic:
+                    wedgeWriter.writeConicWedge(&chunker, pts, *w, midpoint);
                     lastPoint = pts[2];
                     break;
                 case SkPathVerb::kCubic:
-                    memcpy(vertexData + fPatchVertexCount, pts, sizeof(SkPoint) * 4);
+                    wedgeWriter.writeCubicWedge(&chunker, pts, midpoint);
                     lastPoint = pts[3];
                     break;
-                case SkPathVerb::kConic:
-                    GrPathShader::WriteConicPatch(pts, *w, vertexData + fPatchVertexCount);
-                    lastPoint = pts[2];
-                    break;
             }
-            vertexData[fPatchVertexCount + 4] = midpoint;
-            fPatchVertexCount += 5;
         }
         if (lastPoint != startPoint) {
-            GrPathUtils::convertLineToCubic(lastPoint, startPoint, vertexData + fPatchVertexCount);
-            vertexData[fPatchVertexCount + 4] = midpoint;
-            fPatchVertexCount += 5;
+            wedgeWriter.writeFlatWedge(&chunker, lastPoint, startPoint, midpoint);
         }
     }
-
-    vertexAlloc.unlock(fPatchVertexCount);
 }
 
 void GrPathHardwareTessellator::draw(GrOpFlushState* flushState) const {
-    if (fPatchVertexCount) {
-        flushState->bindBuffers(nullptr, nullptr, fPatchBuffer);
-        flushState->draw(fPatchVertexCount, fBasePatchVertex);
+    for (const GrVertexChunk& chunk : fVertexChunkArray) {
+        flushState->bindBuffers(nullptr, nullptr, chunk.fBuffer);
+        flushState->draw(chunk.fCount * fNumVerticesPerPatch, chunk.fBase * fNumVerticesPerPatch);
         if (flushState->caps().requiresManualFBBarrierAfterTessellatedStencilDraw()) {
             flushState->gpu()->insertManualFramebufferBarrier();  // http://skbug.com/9739
         }
diff --git a/src/gpu/tessellate/GrPathTessellator.h b/src/gpu/tessellate/GrPathTessellator.h
index 76a9781..c5e66c6 100644
--- a/src/gpu/tessellate/GrPathTessellator.h
+++ b/src/gpu/tessellate/GrPathTessellator.h
@@ -9,6 +9,7 @@
 #define GrPathTessellator_DEFINED
 
 #include "src/gpu/GrInnerFanTriangulator.h"
+#include "src/gpu/GrVertexChunkArray.h"
 #include "src/gpu/ops/GrMeshDrawOp.h"
 #include "src/gpu/tessellate/GrTessellationPathRenderer.h"
 
@@ -30,8 +31,8 @@
     // Called before draw(). Prepares GPU buffers containing the geometry to tessellate. If the
     // given BreadcrumbTriangleList is non-null, then this class will also include the breadcrumb
     // triangles in its draw.
-    virtual void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const SkPath&,
-                         const BreadcrumbTriangleList* = nullptr) = 0;
+    virtual void prepare(GrMeshDrawOp::Target*, const SkRect& cullBounds, const SkMatrix&,
+                         const SkPath&, const BreadcrumbTriangleList* = nullptr) = 0;
 
     // Issues draw calls for the tessellated geometry. The caller is responsible for binding its
     // desired pipeline ahead of time.
@@ -53,14 +54,12 @@
 public:
     GrPathIndirectTessellator(const SkMatrix&, const SkPath&, DrawInnerFan);
 
-    void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const SkPath&,
+    void prepare(GrMeshDrawOp::Target*, const SkRect& cullBounds, const SkMatrix&, const SkPath&,
                  const BreadcrumbTriangleList*) override;
     void draw(GrOpFlushState*) const override;
     void drawHullInstances(GrOpFlushState*) const override;
 
 private:
-    constexpr static float kLinearizationPrecision =
-            GrTessellationPathRenderer::kLinearizationPrecision;
     constexpr static int kMaxResolveLevel = GrTessellationPathRenderer::kMaxResolveLevel;
 
     const bool fDrawInnerFan;
@@ -79,14 +78,14 @@
 // Base class for GrPathTessellators that draw actual hardware tessellation patches.
 class GrPathHardwareTessellator : public GrPathTessellator {
 public:
-    GrPathHardwareTessellator() = default;
+    GrPathHardwareTessellator(int numVerticesPerPatch)
+            : fNumVerticesPerPatch(numVerticesPerPatch) {}
 
     void draw(GrOpFlushState*) const final;
 
 protected:
-    sk_sp<const GrBuffer> fPatchBuffer;
-    int fBasePatchVertex = 0;
-    int fPatchVertexCount = 0;
+    GrVertexChunkArray fVertexChunkArray;
+    int fNumVerticesPerPatch;
 };
 
 // Draws an array of "outer curve" patches and, optionally, inner fan triangles for
@@ -95,9 +94,9 @@
 class GrPathOuterCurveTessellator final : public GrPathHardwareTessellator {
 public:
     GrPathOuterCurveTessellator(DrawInnerFan drawInnerFan)
-            : fDrawInnerFan(drawInnerFan == DrawInnerFan::kYes) {}
+            : GrPathHardwareTessellator(4), fDrawInnerFan(drawInnerFan == DrawInnerFan::kYes) {}
 
-    void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const SkPath&,
+    void prepare(GrMeshDrawOp::Target*, const SkRect& cullBounds, const SkMatrix&, const SkPath&,
                  const BreadcrumbTriangleList*) override;
 
 private:
@@ -110,9 +109,9 @@
 // converted to cubics. Once stencilled, these wedges alone define the complete path.
 class GrPathWedgeTessellator final : public GrPathHardwareTessellator {
 public:
-    GrPathWedgeTessellator() = default;
+    GrPathWedgeTessellator() : GrPathHardwareTessellator(5) {}
 
-    void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const SkPath&,
+    void prepare(GrMeshDrawOp::Target*, const SkRect& cullBounds, const SkMatrix&, const SkPath&,
                  const BreadcrumbTriangleList*) override;
 };
 
diff --git a/tests/GrPathUtilsTest.cpp b/tests/GrPathUtilsTest.cpp
index 070f3ec..96a6e0d 100644
--- a/tests/GrPathUtilsTest.cpp
+++ b/tests/GrPathUtilsTest.cpp
@@ -122,7 +122,8 @@
 
 DEF_TEST(GrPathUtils_convertToCubic, r) {
     SkPoint cubic[4];
-    GrPathUtils::convertLineToCubic({0,0}, {3,6}, cubic);
+    GrVertexWriter cubicWriter(cubic);
+    GrPathUtils::writeLineAsCubic({0,0}, {3,6}, &cubicWriter);
     REPORTER_ASSERT(r, cubic[0] == SkPoint::Make(0,0));
     REPORTER_ASSERT(r, SkScalarNearlyEqual(cubic[1].fX, 1));
     REPORTER_ASSERT(r, SkScalarNearlyEqual(cubic[1].fY, 2));