Handle too many (or too large) paths in GrDefaultPathRenderer

PathGeoBuilder constructs the geometry with the same basic
technique as before, but allows interrupting the process
to emit multiple draws.

Original test case was 2000 non-AA stroked circles, which
created ~66000 vertices. That now renders, as do various
tests with a single large path (as well as filled paths).

Added a new set of 'AtLeast' allocators for vertex and index
data. These take a minimum size and a fallback size. If the
minimum size can be satisfied by an existing block, then
the caller gets *all* memory in that block, otherwise they
get a new block sized for the fallback amount. The previous
allocation scheme wasn't a good fit for the new use-case,
and because we don't usually need many verts, the flexible
approach seems appropriate.

TODO: I think that this could be extracted and re-used for
MSAA path renderer without too much work? I need to read
that code more carefully to make sure it lines up.

Re-land of: https://skia-review.googlesource.com/18360
Re-land of: https://skia-review.googlesource.com/18983

Bug: skia:6695
Change-Id: I09ac1273e5af67ed0e3e886de90e2970c3d0b239
Reviewed-on: https://skia-review.googlesource.com/19480
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/GrBufferAllocPool.cpp b/src/gpu/GrBufferAllocPool.cpp
index 38bde0d..3096ae5 100644
--- a/src/gpu/GrBufferAllocPool.cpp
+++ b/src/gpu/GrBufferAllocPool.cpp
@@ -185,6 +185,64 @@
     return fBufferPtr;
 }
 
+void* GrBufferAllocPool::makeSpaceAtLeast(size_t minSize,
+                                          size_t fallbackSize,
+                                          size_t alignment,
+                                          const GrBuffer** buffer,
+                                          size_t* offset,
+                                          size_t* actualSize) {
+    VALIDATE();
+
+    SkASSERT(buffer);
+    SkASSERT(offset);
+    SkASSERT(actualSize);
+
+    if (fBufferPtr) {
+        BufferBlock& back = fBlocks.back();
+        size_t usedBytes = back.fBuffer->gpuMemorySize() - back.fBytesFree;
+        size_t pad = GrSizeAlignUpPad(usedBytes, alignment);
+        if ((minSize + pad) <= back.fBytesFree) {
+            // Consume padding first, to make subsequent alignment math easier
+            memset((void*)(reinterpret_cast<intptr_t>(fBufferPtr) + usedBytes), 0, pad);
+            usedBytes += pad;
+            back.fBytesFree -= pad;
+            fBytesInUse += pad;
+
+            // Give caller all remaining space in this block (but aligned correctly)
+            size_t size = GrSizeAlignDown(back.fBytesFree, alignment);
+            *offset = usedBytes;
+            *buffer = back.fBuffer;
+            *actualSize = size;
+            back.fBytesFree -= size;
+            fBytesInUse += size;
+            VALIDATE();
+            return (void*)(reinterpret_cast<intptr_t>(fBufferPtr) + usedBytes);
+        }
+    }
+
+    // We could honor the space request using by a partial update of the current
+    // VB (if there is room). But we don't currently use draw calls to GL that
+    // allow the driver to know that previously issued draws won't read from
+    // the part of the buffer we update. Also, the GL buffer implementation
+    // may be cheating on the actual buffer size by shrinking the buffer on
+    // updateData() if the amount of data passed is less than the full buffer
+    // size.
+
+    if (!this->createBlock(fallbackSize)) {
+        return nullptr;
+    }
+    SkASSERT(fBufferPtr);
+
+    *offset = 0;
+    BufferBlock& back = fBlocks.back();
+    *buffer = back.fBuffer;
+    *actualSize = fallbackSize;
+    back.fBytesFree -= fallbackSize;
+    fBytesInUse += fallbackSize;
+    VALIDATE();
+    return fBufferPtr;
+}
+
 void GrBufferAllocPool::putBack(size_t bytes) {
     VALIDATE();
 
@@ -345,6 +403,35 @@
     return ptr;
 }
 
+void* GrVertexBufferAllocPool::makeSpaceAtLeast(size_t vertexSize, int minVertexCount,
+                                                int fallbackVertexCount, const GrBuffer** buffer,
+                                                int* startVertex, int* actualVertexCount) {
+
+    SkASSERT(minVertexCount >= 0);
+    SkASSERT(fallbackVertexCount >= minVertexCount);
+    SkASSERT(buffer);
+    SkASSERT(startVertex);
+    SkASSERT(actualVertexCount);
+
+    size_t offset SK_INIT_TO_AVOID_WARNING;
+    size_t actualSize SK_INIT_TO_AVOID_WARNING;
+    void* ptr = INHERITED::makeSpaceAtLeast(vertexSize * minVertexCount,
+                                            vertexSize * fallbackVertexCount,
+                                            vertexSize,
+                                            buffer,
+                                            &offset,
+                                            &actualSize);
+
+    SkASSERT(0 == offset % vertexSize);
+    *startVertex = static_cast<int>(offset / vertexSize);
+
+    SkASSERT(0 == actualSize % vertexSize);
+    SkASSERT(actualSize >= vertexSize * minVertexCount);
+    *actualVertexCount = static_cast<int>(actualSize / vertexSize);
+
+    return ptr;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 
 GrIndexBufferAllocPool::GrIndexBufferAllocPool(GrGpu* gpu)
@@ -369,3 +456,30 @@
     *startIndex = static_cast<int>(offset / sizeof(uint16_t));
     return ptr;
 }
+
+void* GrIndexBufferAllocPool::makeSpaceAtLeast(int minIndexCount, int fallbackIndexCount,
+                                               const GrBuffer** buffer, int* startIndex,
+                                               int* actualIndexCount) {
+    SkASSERT(minIndexCount >= 0);
+    SkASSERT(fallbackIndexCount >= minIndexCount);
+    SkASSERT(buffer);
+    SkASSERT(startIndex);
+    SkASSERT(actualIndexCount);
+
+    size_t offset SK_INIT_TO_AVOID_WARNING;
+    size_t actualSize SK_INIT_TO_AVOID_WARNING;
+    void* ptr = INHERITED::makeSpaceAtLeast(minIndexCount * sizeof(uint16_t),
+                                            fallbackIndexCount * sizeof(uint16_t),
+                                            sizeof(uint16_t),
+                                            buffer,
+                                            &offset,
+                                            &actualSize);
+
+    SkASSERT(0 == offset % sizeof(uint16_t));
+    *startIndex = static_cast<int>(offset / sizeof(uint16_t));
+
+    SkASSERT(0 == actualSize % sizeof(uint16_t));
+    SkASSERT(actualSize >= minIndexCount * sizeof(uint16_t));
+    *actualIndexCount = static_cast<int>(actualSize / sizeof(uint16_t));
+    return ptr;
+}
diff --git a/src/gpu/GrBufferAllocPool.h b/src/gpu/GrBufferAllocPool.h
index 071b00b..1b58ef4 100644
--- a/src/gpu/GrBufferAllocPool.h
+++ b/src/gpu/GrBufferAllocPool.h
@@ -86,6 +86,38 @@
                     const GrBuffer** buffer,
                     size_t* offset);
 
+    /**
+     * Returns a block of memory to hold data. A buffer designated to hold the
+     * data is given to the caller. The buffer may or may not be locked. The
+     * returned ptr remains valid until any of the following:
+     *      *makeSpace is called again.
+     *      *unmap is called.
+     *      *reset is called.
+     *      *this object is destroyed.
+     *
+     * Once unmap on the pool is called the data is guaranteed to be in the
+     * buffer at the offset indicated by offset. Until that time it may be
+     * in temporary storage and/or the buffer may be locked.
+     *
+     * The caller requests a minimum number of bytes, but the block may be (much)
+     * larger. Assuming that a new block must be allocated, it will be fallbackSize bytes.
+     * The actual block size is returned in actualSize.
+     *
+     * @param minSize        the minimum amount of data to make space for
+     * @param fallbackSize   the amount of data to make space for if a new block is needed
+     * @param alignment      alignment constraint from start of buffer
+     * @param buffer         returns the buffer that will hold the data.
+     * @param offset         returns the offset into buffer of the data.
+     * @param actualSize     returns the capacity of the block
+     * @return pointer to where the client should write the data.
+     */
+    void* makeSpaceAtLeast(size_t minSize,
+                           size_t fallbackSize,
+                           size_t alignment,
+                           const GrBuffer** buffer,
+                           size_t* offset,
+                           size_t* actualSize);
+
     GrBuffer* getBuffer(size_t size);
 
 private:
@@ -152,6 +184,40 @@
                     const GrBuffer** buffer,
                     int* startVertex);
 
+    /**
+     * Returns a block of memory to hold vertices. A buffer designated to hold
+     * the vertices given to the caller. The buffer may or may not be locked.
+     * The returned ptr remains valid until any of the following:
+     *      *makeSpace is called again.
+     *      *unmap is called.
+     *      *reset is called.
+     *      *this object is destroyed.
+     *
+     * Once unmap on the pool is called the vertices are guaranteed to be in
+     * the buffer at the offset indicated by startVertex. Until that time they
+     * may be in temporary storage and/or the buffer may be locked.
+     *
+     * The caller requests a minimum number of vertices, but the block may be (much)
+     * larger. Assuming that a new block must be allocated, it will be sized to hold
+     * fallbackVertexCount vertices. The actual block size (in vertices) is returned in
+     * actualVertexCount.
+     *
+     * @param vertexSize           specifies size of a vertex to allocate space for
+     * @param minVertexCount       minimum number of vertices to allocate space for
+     * @param fallbackVertexCount  number of vertices to allocate space for if a new block is needed
+     * @param buffer               returns the vertex buffer that will hold the vertices.
+     * @param startVertex          returns the offset into buffer of the first vertex.
+     *                             In units of the size of a vertex from layout param.
+     * @param actualVertexCount    returns the capacity of the block (in vertices)
+     * @return pointer to first vertex.
+     */
+    void* makeSpaceAtLeast(size_t vertexSize,
+                           int minVertexCount,
+                           int fallbackVertexCount,
+                           const GrBuffer** buffer,
+                           int* startVertex,
+                           int* actualVertexCount);
+
 private:
     typedef GrBufferAllocPool INHERITED;
 };
@@ -190,6 +256,37 @@
                     const GrBuffer** buffer,
                     int* startIndex);
 
+    /**
+     * Returns a block of memory to hold indices. A buffer designated to hold
+     * the indices is given to the caller. The buffer may or may not be locked.
+     * The returned ptr remains valid until any of the following:
+     *      *makeSpace is called again.
+     *      *unmap is called.
+     *      *reset is called.
+     *      *this object is destroyed.
+     *
+     * Once unmap on the pool is called the indices are guaranteed to be in the
+     * buffer at the offset indicated by startIndex. Until that time they may be
+     * in temporary storage and/or the buffer may be locked.
+     *
+     * The caller requests a minimum number of indices, but the block may be (much)
+     * larger. Assuming that a new block must be allocated, it will be sized to hold
+     * fallbackIndexCount indices. The actual block size (in indices) is returned in
+     * actualIndexCount.
+     *
+     * @param minIndexCount        minimum number of indices to allocate space for
+     * @param fallbackIndexCount   number of indices to allocate space for if a new block is needed
+     * @param buffer               returns the index buffer that will hold the indices.
+     * @param startIndex           returns the offset into buffer of the first index.
+     * @param actualIndexCount     returns the capacity of the block (in indices)
+     * @return pointer to first index.
+     */
+    void* makeSpaceAtLeast(int minIndexCount,
+                           int fallbackIndexCount,
+                           const GrBuffer** buffer,
+                           int* startIndex,
+                           int* actualIndexCount);
+
 private:
     typedef GrBufferAllocPool INHERITED;
 };
diff --git a/src/gpu/GrOpFlushState.cpp b/src/gpu/GrOpFlushState.cpp
index b8166f0..a7129d1 100644
--- a/src/gpu/GrOpFlushState.cpp
+++ b/src/gpu/GrOpFlushState.cpp
@@ -36,6 +36,20 @@
     return reinterpret_cast<uint16_t*>(fIndexPool.makeSpace(indexCount, buffer, startIndex));
 }
 
+void* GrOpFlushState::makeVertexSpaceAtLeast(size_t vertexSize, int minVertexCount,
+                                             int fallbackVertexCount, const GrBuffer** buffer,
+                                             int* startVertex, int* actualVertexCount) {
+    return fVertexPool.makeSpaceAtLeast(vertexSize, minVertexCount, fallbackVertexCount, buffer,
+                                        startVertex, actualVertexCount);
+}
+
+uint16_t* GrOpFlushState::makeIndexSpaceAtLeast(int minIndexCount, int fallbackIndexCount,
+                                                const GrBuffer** buffer, int* startIndex,
+                                                int* actualIndexCount) {
+    return reinterpret_cast<uint16_t*>(fIndexPool.makeSpaceAtLeast(
+        minIndexCount, fallbackIndexCount, buffer, startIndex, actualIndexCount));
+}
+
 void GrOpFlushState::doUpload(GrDrawOp::DeferredUploadFn& upload) {
     GrDrawOp::WritePixelsFn wp = [this](GrSurface* surface, int left, int top, int width,
                                         int height, GrPixelConfig config, const void* buffer,
diff --git a/src/gpu/GrOpFlushState.h b/src/gpu/GrOpFlushState.h
index 3a2fe32..b5cacfa 100644
--- a/src/gpu/GrOpFlushState.h
+++ b/src/gpu/GrOpFlushState.h
@@ -59,6 +59,12 @@
                           const GrBuffer** buffer, int* startVertex);
     uint16_t* makeIndexSpace(int indexCount, const GrBuffer** buffer, int* startIndex);
 
+    void* makeVertexSpaceAtLeast(size_t vertexSize, int minVertexCount, int fallbackVertexCount,
+                                 const GrBuffer** buffer, int* startVertex, int* actualVertexCount);
+    uint16_t* makeIndexSpaceAtLeast(int minIndexCount, int fallbackIndexCount,
+                                    const GrBuffer** buffer, int* startIndex,
+                                    int* actualIndexCount);
+
     /** This is called after each op has a chance to prepare its draws and before the draws are
         issued. */
     void preIssueDraws() {
@@ -205,6 +211,21 @@
         return this->state()->makeIndexSpace(indexCount, buffer, startIndex);
     }
 
+    void* makeVertexSpaceAtLeast(size_t vertexSize, int minVertexCount, int fallbackVertexCount,
+                                 const GrBuffer** buffer, int* startVertex,
+                                 int* actualVertexCount) {
+        return this->state()->makeVertexSpaceAtLeast(vertexSize, minVertexCount,
+                                                     fallbackVertexCount, buffer, startVertex,
+                                                     actualVertexCount);
+    }
+
+    uint16_t* makeIndexSpaceAtLeast(int minIndexCount, int fallbackIndexCount,
+                                    const GrBuffer** buffer, int* startIndex,
+                                    int* actualIndexCount) {
+        return this->state()->makeIndexSpaceAtLeast(minIndexCount, fallbackIndexCount, buffer,
+                                                    startIndex, actualIndexCount);
+    }
+
     /** Helpers for ops which over-allocate and then return data to the pool. */
     void putBackIndices(int indices) { this->state()->putBackIndices(indices); }
     void putBackVertices(int vertices, size_t vertexStride) {
diff --git a/src/gpu/GrPathUtils.cpp b/src/gpu/GrPathUtils.cpp
index 6047e60..b6711a0 100644
--- a/src/gpu/GrPathUtils.cpp
+++ b/src/gpu/GrPathUtils.cpp
@@ -10,7 +10,6 @@
 #include "GrTypes.h"
 #include "SkMathPriv.h"
 
-static const int MAX_POINTS_PER_CURVE = 1 << 10;
 static const SkScalar gMinCurveTol = 0.0001f;
 
 SkScalar GrPathUtils::scaleToleranceToSrc(SkScalar devTol,
@@ -44,7 +43,7 @@
 
     SkScalar d = points[1].distanceToLineSegmentBetween(points[0], points[2]);
     if (!SkScalarIsFinite(d)) {
-        return MAX_POINTS_PER_CURVE;
+        return kMaxPointsPerCurve;
     } else if (d <= tol) {
         return 1;
     } else {
@@ -54,7 +53,7 @@
         // 2^(log4(x)) = sqrt(x);
         SkScalar divSqrt = SkScalarSqrt(d / tol);
         if (((SkScalar)SK_MaxS32) <= divSqrt) {
-            return MAX_POINTS_PER_CURVE;
+            return kMaxPointsPerCurve;
         } else {
             int temp = SkScalarCeilToInt(divSqrt);
             int pow2 = GrNextPow2(temp);
@@ -64,7 +63,7 @@
             if (pow2 < 1) {
                 pow2 = 1;
             }
-            return SkTMin(pow2, MAX_POINTS_PER_CURVE);
+            return SkTMin(pow2, kMaxPointsPerCurve);
         }
     }
 }
@@ -104,13 +103,13 @@
         points[2].distanceToLineSegmentBetweenSqd(points[0], points[3]));
     d = SkScalarSqrt(d);
     if (!SkScalarIsFinite(d)) {
-        return MAX_POINTS_PER_CURVE;
+        return kMaxPointsPerCurve;
     } else if (d <= tol) {
         return 1;
     } else {
         SkScalar divSqrt = SkScalarSqrt(d / tol);
         if (((SkScalar)SK_MaxS32) <= divSqrt) {
-            return MAX_POINTS_PER_CURVE;
+            return kMaxPointsPerCurve;
         } else {
             int temp = SkScalarCeilToInt(SkScalarSqrt(d / tol));
             int pow2 = GrNextPow2(temp);
@@ -120,7 +119,7 @@
             if (pow2 < 1) {
                 pow2 = 1;
             }
-            return SkTMin(pow2, MAX_POINTS_PER_CURVE);
+            return SkTMin(pow2, kMaxPointsPerCurve);
         }
     }
 }
diff --git a/src/gpu/GrPathUtils.h b/src/gpu/GrPathUtils.h
index 49c4ee5..e9dee73 100644
--- a/src/gpu/GrPathUtils.h
+++ b/src/gpu/GrPathUtils.h
@@ -178,5 +178,8 @@
     // This value was chosen to approximate the supersampling accuracy of the raster path (16
     // samples, or one quarter pixel).
     static const SkScalar kDefaultTolerance = SkDoubleToScalar(0.25);
+
+    // We guarantee that no quad or cubic will ever produce more than this many points
+    static const int kMaxPointsPerCurve = 1 << 10;
 };
 #endif
diff --git a/src/gpu/ops/GrDefaultPathRenderer.cpp b/src/gpu/ops/GrDefaultPathRenderer.cpp
index c712a9c..55a6f01 100644
--- a/src/gpu/ops/GrDefaultPathRenderer.cpp
+++ b/src/gpu/ops/GrDefaultPathRenderer.cpp
@@ -57,37 +57,294 @@
     }
 }
 
-static inline void append_countour_edge_indices(bool hairLine,
-                                                uint16_t fanCenterIdx,
-                                                uint16_t edgeV0Idx,
-                                                uint16_t** indices) {
-    // when drawing lines we're appending line segments along
-    // the contour. When applying the other fill rules we're
-    // drawing triangle fans around fanCenterIdx.
-    if (!hairLine) {
-        *((*indices)++) = fanCenterIdx;
+class PathGeoBuilder {
+public:
+    PathGeoBuilder(GrPrimitiveType primitiveType, GrMeshDrawOp::Target* target,
+                   GrGeometryProcessor* geometryProcessor, const GrPipeline* pipeline)
+            : fMesh(primitiveType)
+            , fTarget(target)
+            , fVertexStride(sizeof(SkPoint))
+            , fGeometryProcessor(geometryProcessor)
+            , fPipeline(pipeline)
+            , fIndexBuffer(nullptr)
+            , fFirstIndex(0)
+            , fIndicesInChunk(0)
+            , fIndices(nullptr) {
+        this->allocNewBuffers();
     }
-    *((*indices)++) = edgeV0Idx;
-    *((*indices)++) = edgeV0Idx + 1;
-}
 
-static inline void add_quad(SkPoint** vert, const SkPoint* base, const SkPoint pts[],
-                            SkScalar srcSpaceTolSqd, SkScalar srcSpaceTol, bool indexed,
-                            bool isHairline, uint16_t subpathIdxStart, int offset, uint16_t** idx) {
-    // first pt of quad is the pt we ended on in previous step
-    uint16_t firstQPtIdx = (uint16_t)(*vert - base) - 1 + offset;
-    uint16_t numPts =  (uint16_t)
-        GrPathUtils::generateQuadraticPoints(
-            pts[0], pts[1], pts[2],
-            srcSpaceTolSqd, vert,
-            GrPathUtils::quadraticPointCount(pts, srcSpaceTol));
-    if (indexed) {
-        for (uint16_t i = 0; i < numPts; ++i) {
-            append_countour_edge_indices(isHairline, subpathIdxStart,
-                                         firstQPtIdx + i, idx);
+    ~PathGeoBuilder() {
+        this->emitMesh();
+        this->putBackReserve();
+    }
+
+    // Called before we start each path
+    void beginInstance() {
+        fSubpathIndexStart = fVertexOffset;
+        fCurIdx = fIndices + fIndexOffset;
+        fCurVert = fVertices + fVertexOffset;
+    }
+
+    // Called after we end each path
+    void endInstance() {
+        fVertexOffset = fCurVert - fVertices;
+        fIndexOffset = fCurIdx - fIndices;
+        SkASSERT(fVertexOffset <= fVerticesInChunk);
+        SkASSERT(fIndexOffset <= fIndicesInChunk);
+    }
+
+    /**
+     *  Path verbs
+     */
+    void moveTo(const SkPoint& p) {
+        needSpace(1);
+
+        fSubpathIndexStart = this->currentIndex();
+        *(fCurVert++) = p;
+    }
+
+    void addLine(const SkPoint& p) {
+        needSpace(1, this->indexScale());
+
+        if (this->isIndexed()) {
+            uint16_t prevIdx = this->currentIndex() - 1;
+            appendCountourEdgeIndices(prevIdx);
+        }
+        *(fCurVert++) = p;
+    }
+
+    void addQuad(const SkPoint pts[], SkScalar srcSpaceTolSqd, SkScalar srcSpaceTol) {
+        this->needSpace(GrPathUtils::kMaxPointsPerCurve,
+                        GrPathUtils::kMaxPointsPerCurve * this->indexScale());
+
+        // First pt of quad is the pt we ended on in previous step
+        uint16_t firstQPtIdx = this->currentIndex() - 1;
+        uint16_t numPts = (uint16_t)GrPathUtils::generateQuadraticPoints(
+                pts[0], pts[1], pts[2], srcSpaceTolSqd, &fCurVert,
+                GrPathUtils::quadraticPointCount(pts, srcSpaceTol));
+        if (this->isIndexed()) {
+            for (uint16_t i = 0; i < numPts; ++i) {
+                appendCountourEdgeIndices(firstQPtIdx + i);
+            }
         }
     }
-}
+
+    void addConic(SkScalar weight, const SkPoint pts[], SkScalar srcSpaceTolSqd,
+                  SkScalar srcSpaceTol) {
+        SkAutoConicToQuads converter;
+        const SkPoint* quadPts = converter.computeQuads(pts, weight, srcSpaceTol);
+        for (int i = 0; i < converter.countQuads(); ++i) {
+            this->addQuad(quadPts + i * 2, srcSpaceTolSqd, srcSpaceTol);
+        }
+    }
+
+    void addCubic(const SkPoint pts[], SkScalar srcSpaceTolSqd, SkScalar srcSpaceTol) {
+        this->needSpace(GrPathUtils::kMaxPointsPerCurve,
+                        GrPathUtils::kMaxPointsPerCurve * this->indexScale());
+
+        // First pt of cubic is the pt we ended on in previous step
+        uint16_t firstCPtIdx = this->currentIndex() - 1;
+        uint16_t numPts = (uint16_t) GrPathUtils::generateCubicPoints(
+                pts[0], pts[1], pts[2], pts[3], srcSpaceTolSqd, &fCurVert,
+                GrPathUtils::cubicPointCount(pts, srcSpaceTol));
+        if (this->isIndexed()) {
+            for (uint16_t i = 0; i < numPts; ++i) {
+                appendCountourEdgeIndices(firstCPtIdx + i);
+            }
+        }
+    }
+
+    void addPath(const SkPath& path, SkScalar srcSpaceTol) {
+        SkScalar srcSpaceTolSqd = srcSpaceTol * srcSpaceTol;
+
+        SkPath::Iter iter(path, false);
+        SkPoint pts[4];
+
+        bool done = false;
+        while (!done) {
+            SkPath::Verb verb = iter.next(pts);
+            switch (verb) {
+                case SkPath::kMove_Verb:
+                    this->moveTo(pts[0]);
+                    break;
+                case SkPath::kLine_Verb:
+                    this->addLine(pts[1]);
+                    break;
+                case SkPath::kConic_Verb:
+                    this->addConic(iter.conicWeight(), pts, srcSpaceTolSqd, srcSpaceTol);
+                    break;
+                case SkPath::kQuad_Verb:
+                    this->addQuad(pts, srcSpaceTolSqd, srcSpaceTol);
+                    break;
+                case SkPath::kCubic_Verb:
+                    this->addCubic(pts, srcSpaceTolSqd, srcSpaceTol);
+                    break;
+                case SkPath::kClose_Verb:
+                    break;
+                case SkPath::kDone_Verb:
+                    done = true;
+            }
+        }
+    }
+
+    static bool PathHasMultipleSubpaths(const SkPath& path) {
+        bool first = true;
+
+        SkPath::Iter iter(path, false);
+        SkPath::Verb verb;
+
+        SkPoint pts[4];
+        while ((verb = iter.next(pts)) != SkPath::kDone_Verb) {
+            if (SkPath::kMove_Verb == verb && !first) {
+                return true;
+            }
+            first = false;
+        }
+        return false;
+    }
+
+private:
+    /**
+     *  Derived properties
+     *  TODO: Cache some of these for better performance, rather than re-computing?
+     */
+    bool isIndexed() const {
+        return GrPrimitiveType::kLines == fMesh.primitiveType() ||
+               GrPrimitiveType::kTriangles == fMesh.primitiveType();
+    }
+    bool isHairline() const {
+        return GrPrimitiveType::kLines == fMesh.primitiveType() ||
+               GrPrimitiveType::kLineStrip == fMesh.primitiveType();
+    }
+    int indexScale() const {
+        switch (fMesh.primitiveType()) {
+            case GrPrimitiveType::kLines:
+                return 2;
+            case GrPrimitiveType::kTriangles:
+                return 3;
+            default:
+                return 0;
+        }
+    }
+
+    uint16_t currentIndex() const { return fCurVert - fVertices; }
+
+    void putBackReserve() {
+        fTarget->putBackIndices((size_t)(fIndicesInChunk - fIndexOffset));
+        fTarget->putBackVertices((size_t)(fVerticesInChunk - fVertexOffset), fVertexStride);
+    }
+
+    // Allocate vertex and (possibly) index buffers
+    void allocNewBuffers() {
+        // Ensure that we always get enough verts for a worst-case quad/cubic, plus leftover points
+        // from previous mesh piece (up to two verts to continue fanning). If we can't get that
+        // many, ask for a much larger number. This needs to be fairly big to handle  quads/cubics,
+        // which have a worst-case of 1k points.
+        static const int kMinVerticesPerChunk = GrPathUtils::kMaxPointsPerCurve + 2;
+        static const int kFallbackVerticesPerChunk = 16384;
+
+        fVertices = static_cast<SkPoint*>(fTarget->makeVertexSpaceAtLeast(fVertexStride,
+                                                                          kMinVerticesPerChunk,
+                                                                          kFallbackVerticesPerChunk,
+                                                                          &fVertexBuffer,
+                                                                          &fFirstVertex,
+                                                                          &fVerticesInChunk));
+
+        if (this->isIndexed()) {
+            // Similar to above: Ensure we get enough indices for one worst-case quad/cubic.
+            // No extra indices are needed for stitching, though. If we can't get that many, ask
+            // for enough to match our large vertex request.
+            const int kMinIndicesPerChunk = GrPathUtils::kMaxPointsPerCurve * this->indexScale();
+            const int kFallbackIndicesPerChunk = kFallbackVerticesPerChunk * this->indexScale();
+
+            fIndices = fTarget->makeIndexSpaceAtLeast(kMinIndicesPerChunk, kFallbackIndicesPerChunk,
+                                                      &fIndexBuffer, &fFirstIndex,
+                                                      &fIndicesInChunk);
+        }
+        fVertexOffset = 0;
+        fIndexOffset = 0;
+    }
+
+    void appendCountourEdgeIndices(uint16_t edgeV0Idx) {
+        // When drawing lines we're appending line segments along the countour. When applying the
+        // other fill rules we're drawing triangle fans around the start of the current (sub)path.
+        if (!this->isHairline()) {
+            *(fCurIdx++) = fSubpathIndexStart;
+        }
+        *(fCurIdx++) = edgeV0Idx;
+        *(fCurIdx++) = edgeV0Idx + 1;
+    }
+
+    // Emits a single draw with all accumulated vertex/index data
+    void emitMesh() {
+        if (fVertexOffset > 0) {
+            if (!this->isIndexed()) {
+                fMesh.setNonIndexedNonInstanced(fVertexOffset);
+            } else {
+                fMesh.setIndexed(fIndexBuffer, fIndexOffset, fFirstIndex, 0, fVertexOffset - 1);
+            }
+            fMesh.setVertexData(fVertexBuffer, fFirstVertex);
+            fTarget->draw(fGeometryProcessor, fPipeline, fMesh);
+        }
+    }
+
+    void needSpace(int vertsNeeded, int indicesNeeded = 0) {
+        if (fCurVert + vertsNeeded > fVertices + fVerticesInChunk ||
+            fCurIdx + indicesNeeded > fIndices + fIndicesInChunk) {
+            // We are about to run out of space (possibly)
+
+            // To maintain continuity, we need to remember one or two points from the current mesh.
+            // Lines only need the last point, fills need the first point from the current contour.
+            // We always grab both here, and append the ones we need at the end of this process.
+            SkPoint lastPt = *(fCurVert - 1);
+            // It's possible for fSubpathIndexStart to be past the end of the vertex buffer, if we
+            // fill up the vertex buffer exactly at the end of a path. To keep things simple, we
+            // still inject a point, but it's just (0,0) because it won't actually be used.
+            SkPoint subpathStartPt = fSubpathIndexStart >= fVerticesInChunk
+                                     ? SkPoint::Make(0, 0) : fVertices[fSubpathIndexStart];
+
+            // Pretend that we've reached the end of an entire path, so our offsets are correct
+            this->endInstance();
+
+            // Draw the mesh we've accumulated
+            this->emitMesh();
+
+            // Put back any unused space, get new buffers
+            this->putBackReserve();
+            this->allocNewBuffers();
+
+            // Start a "new" path, which is really just a continuation of the in-progress one
+            this->beginInstance();
+
+            // Append copies of the points we saved so the two meshes will weld properly
+            if (!this->isHairline()) {
+                *(fCurVert++) = subpathStartPt;
+            }
+            *(fCurVert++) = lastPt;
+        }
+    }
+
+    GrMesh fMesh;
+    GrMeshDrawOp::Target* fTarget;
+    size_t fVertexStride;
+    GrGeometryProcessor* fGeometryProcessor;
+    const GrPipeline* fPipeline;
+
+    const GrBuffer* fVertexBuffer;
+    int fFirstVertex;
+    int fVerticesInChunk;
+    SkPoint* fVertices;
+    SkPoint* fCurVert;
+    int fVertexOffset;
+
+    const GrBuffer* fIndexBuffer;
+    int fFirstIndex;
+    int fIndicesInChunk;
+    uint16_t* fIndices;
+    uint16_t* fCurIdx;
+    int fIndexOffset;
+    uint16_t fSubpathIndexStart;
+};
 
 class DefaultPathOp final : public GrLegacyMeshDrawOp {
 public:
@@ -151,112 +408,35 @@
             gp = GrDefaultGeoProcFactory::Make(color, coverage, localCoords, this->viewMatrix());
         }
 
-        size_t vertexStride = gp->getVertexStride();
-        SkASSERT(vertexStride == sizeof(SkPoint));
+        SkASSERT(gp->getVertexStride() == sizeof(SkPoint));
 
         int instanceCount = fPaths.count();
 
-        // compute number of vertices
-        int maxVertices = 0;
-
         // We will use index buffers if we have multiple paths or one path with multiple contours
         bool isIndexed = instanceCount > 1;
-        for (int i = 0; i < instanceCount; i++) {
+        for (int i = 0; !isIndexed && i < instanceCount; i++) {
             const PathData& args = fPaths[i];
-
-            int contourCount;
-            maxVertices += GrPathUtils::worstCasePointCount(args.fPath, &contourCount,
-                                                            args.fTolerance);
-
-            isIndexed = isIndexed || contourCount > 1;
-        }
-
-        if (maxVertices == 0 || maxVertices > ((int)SK_MaxU16 + 1)) {
-            //SkDebugf("Cannot render path (%d)\n", maxVertices);
-            return;
+            isIndexed = isIndexed || PathGeoBuilder::PathHasMultipleSubpaths(args.fPath);
         }
 
         // determine primitiveType
-        int maxIndices = 0;
         GrPrimitiveType primitiveType;
         if (this->isHairline()) {
-            if (isIndexed) {
-                maxIndices = 2 * maxVertices;
-                primitiveType = GrPrimitiveType::kLines;
-            } else {
-                primitiveType = GrPrimitiveType::kLineStrip;
-            }
+            primitiveType = isIndexed ? GrPrimitiveType::kLines : GrPrimitiveType::kLineStrip;
         } else {
-            if (isIndexed) {
-                maxIndices = 3 * maxVertices;
-                primitiveType = GrPrimitiveType::kTriangles;
-            } else {
-                primitiveType = GrPrimitiveType::kTriangleFan;
-            }
+            primitiveType = isIndexed ? GrPrimitiveType::kTriangles : GrPrimitiveType::kTriangleFan;
         }
 
-        // allocate vertex / index buffers
-        const GrBuffer* vertexBuffer;
-        int firstVertex;
-
-        void* verts = target->makeVertexSpace(vertexStride, maxVertices,
-                                              &vertexBuffer, &firstVertex);
-
-        if (!verts) {
-            SkDebugf("Could not allocate vertices\n");
-            return;
-        }
-
-        const GrBuffer* indexBuffer = nullptr;
-        int firstIndex = 0;
-
-        void* indices = nullptr;
-        if (isIndexed) {
-            indices = target->makeIndexSpace(maxIndices, &indexBuffer, &firstIndex);
-
-            if (!indices) {
-                SkDebugf("Could not allocate indices\n");
-                return;
-            }
-        }
+        PathGeoBuilder pathGeoBuilder(primitiveType, target, gp.get(), this->pipeline());
 
         // fill buffers
-        int vertexOffset = 0;
-        int indexOffset = 0;
         for (int i = 0; i < instanceCount; i++) {
             const PathData& args = fPaths[i];
 
-            int vertexCnt = 0;
-            int indexCnt = 0;
-            if (!this->createGeom(verts,
-                                  vertexOffset,
-                                  indices,
-                                  indexOffset,
-                                  &vertexCnt,
-                                  &indexCnt,
-                                  args.fPath,
-                                  args.fTolerance,
-                                  isIndexed)) {
-                return;
-            }
-
-            vertexOffset += vertexCnt;
-            indexOffset += indexCnt;
-            SkASSERT(vertexOffset <= maxVertices && indexOffset <= maxIndices);
+            pathGeoBuilder.beginInstance();
+            pathGeoBuilder.addPath(args.fPath, args.fTolerance);
+            pathGeoBuilder.endInstance();
         }
-
-        GrMesh mesh(primitiveType);
-        if (!isIndexed) {
-            mesh.setNonIndexedNonInstanced(vertexOffset);
-        } else {
-            mesh.setIndexed(indexBuffer, indexOffset, firstIndex, 0, vertexOffset - 1);
-        }
-        mesh.setVertexData(vertexBuffer, firstVertex);
-        target->draw(gp.get(), this->pipeline(), mesh);
-
-        // put back reserves
-        target->putBackIndices((size_t)(maxIndices - indexOffset));
-        target->putBackVertices((size_t)(maxVertices - vertexOffset), (size_t)vertexStride);
     }
 
     bool onCombineIfPossible(GrOp* t, const GrCaps& caps) override {
@@ -287,98 +467,6 @@
         return true;
     }
 
-    bool createGeom(void* vertices,
-                    size_t vertexOffset,
-                    void* indices,
-                    size_t indexOffset,
-                    int* vertexCnt,
-                    int* indexCnt,
-                    const SkPath& path,
-                    SkScalar srcSpaceTol,
-                    bool isIndexed) const {
-            SkScalar srcSpaceTolSqd = srcSpaceTol * srcSpaceTol;
-
-            uint16_t indexOffsetU16 = (uint16_t)indexOffset;
-            uint16_t vertexOffsetU16 = (uint16_t)vertexOffset;
-
-            uint16_t* idxBase = reinterpret_cast<uint16_t*>(indices) + indexOffsetU16;
-            uint16_t* idx = idxBase;
-            uint16_t subpathIdxStart = vertexOffsetU16;
-
-            SkPoint* base = reinterpret_cast<SkPoint*>(vertices) + vertexOffset;
-            SkPoint* vert = base;
-
-            SkPoint pts[4];
-
-            bool first = true;
-            int subpath = 0;
-
-            SkPath::Iter iter(path, false);
-
-            bool done = false;
-            while (!done) {
-                SkPath::Verb verb = iter.next(pts);
-                switch (verb) {
-                    case SkPath::kMove_Verb:
-                        if (!first) {
-                            uint16_t currIdx = (uint16_t) (vert - base) + vertexOffsetU16;
-                            subpathIdxStart = currIdx;
-                            ++subpath;
-                        }
-                        *vert = pts[0];
-                        vert++;
-                        break;
-                    case SkPath::kLine_Verb:
-                        if (isIndexed) {
-                            uint16_t prevIdx = (uint16_t)(vert - base) - 1 + vertexOffsetU16;
-                            append_countour_edge_indices(this->isHairline(), subpathIdxStart,
-                                                         prevIdx, &idx);
-                        }
-                        *(vert++) = pts[1];
-                        break;
-                    case SkPath::kConic_Verb: {
-                        SkScalar weight = iter.conicWeight();
-                        SkAutoConicToQuads converter;
-                        const SkPoint* quadPts = converter.computeQuads(pts, weight, srcSpaceTol);
-                        for (int i = 0; i < converter.countQuads(); ++i) {
-                            add_quad(&vert, base, quadPts + i*2, srcSpaceTolSqd, srcSpaceTol,
-                                     isIndexed, this->isHairline(), subpathIdxStart,
-                                     (int)vertexOffset, &idx);
-                        }
-                        break;
-                    }
-                    case SkPath::kQuad_Verb:
-                        add_quad(&vert, base, pts, srcSpaceTolSqd, srcSpaceTol, isIndexed,
-                                 this->isHairline(), subpathIdxStart, (int)vertexOffset, &idx);
-                        break;
-                    case SkPath::kCubic_Verb: {
-                        // first pt of cubic is the pt we ended on in previous step
-                        uint16_t firstCPtIdx = (uint16_t)(vert - base) - 1 + vertexOffsetU16;
-                        uint16_t numPts = (uint16_t) GrPathUtils::generateCubicPoints(
-                                        pts[0], pts[1], pts[2], pts[3],
-                                        srcSpaceTolSqd, &vert,
-                                        GrPathUtils::cubicPointCount(pts, srcSpaceTol));
-                        if (isIndexed) {
-                            for (uint16_t i = 0; i < numPts; ++i) {
-                                append_countour_edge_indices(this->isHairline(), subpathIdxStart,
-                                                             firstCPtIdx + i, &idx);
-                            }
-                        }
-                        break;
-                    }
-                    case SkPath::kClose_Verb:
-                        break;
-                    case SkPath::kDone_Verb:
-                        done = true;
-                }
-                first = false;
-            }
-
-            *vertexCnt = static_cast<int>(vert - base);
-            *indexCnt = static_cast<int>(idx - idxBase);
-        return true;
-    }
-
     GrColor color() const { return fColor; }
     uint8_t coverage() const { return fCoverage; }
     bool usesLocalCoords() const { return fUsesLocalCoords; }