Reland "ccpr: Implement stroking with fine triangle strips"

This is a reland of 2f2757fa6ba8134330e05694d08907f6e37abb41

Original change's description:
> ccpr: Implement stroking with fine triangle strips
>
> Implements strokes by linearizing the curve into fine triangle strips
> and interpolating a coverage ramp for edge AA. Each triangle in the
> strip emits either positive or negative coverage, depending on its
> winding direction. Joins and caps are drawn with the existing CCPR
> shaders for triangles and conics.
>
> Conic strokes and non-rigid-body transforms are not yet supported.
>
> Bug: skia:
> Change-Id: I45a819abd64e91c2b62e992587eb85c703e09e77
> Reviewed-on: https://skia-review.googlesource.com/148243
> Commit-Queue: Chris Dalton <csmartdalton@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>
> Reviewed-by: Allan MacKinnon <allanmac@google.com>

TBR=robertphillips@google.com

Bug: skia:
Change-Id: I3f0065e80975ee8334300bc5e934231b66b49178
Reviewed-on: https://skia-review.googlesource.com/151188
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Chris Dalton <csmartdalton@google.com>
diff --git a/src/core/SkStrokeRec.cpp b/src/core/SkStrokeRec.cpp
index 41a3c30..bbea1e2 100644
--- a/src/core/SkStrokeRec.cpp
+++ b/src/core/SkStrokeRec.cpp
@@ -135,10 +135,19 @@
     paint->setStrokeJoin((SkPaint::Join)fJoin);
 }
 
-static inline SkScalar get_inflation_bounds(SkPaint::Join join,
-                                            SkScalar miterLimit,
-                                            SkPaint::Cap cap,
-                                            SkScalar strokeWidth) {
+SkScalar SkStrokeRec::getInflationRadius() const {
+    return GetInflationRadius((SkPaint::Join)fJoin, fMiterLimit, (SkPaint::Cap)fCap, fWidth);
+}
+
+SkScalar SkStrokeRec::GetInflationRadius(const SkPaint& paint, SkPaint::Style style) {
+    SkScalar width = SkPaint::kFill_Style == style ? -SK_Scalar1 : paint.getStrokeWidth();
+    return GetInflationRadius(paint.getStrokeJoin(), paint.getStrokeMiter(), paint.getStrokeCap(),
+                              width);
+
+}
+
+SkScalar SkStrokeRec::GetInflationRadius(SkPaint::Join join, SkScalar miterLimit, SkPaint::Cap cap,
+                                         SkScalar strokeWidth) {
     if (strokeWidth < 0) {  // fill
         return 0;
     } else if (0 == strokeWidth) {
@@ -159,13 +168,3 @@
     return strokeWidth/2 * multiplier;
 }
 
-SkScalar SkStrokeRec::getInflationRadius() const {
-    return get_inflation_bounds((SkPaint::Join)fJoin, fMiterLimit, (SkPaint::Cap)fCap, fWidth);
-}
-
-SkScalar SkStrokeRec::GetInflationRadius(const SkPaint& paint, SkPaint::Style style) {
-    SkScalar width = SkPaint::kFill_Style == style ? -SK_Scalar1 : paint.getStrokeWidth();
-    return get_inflation_bounds(paint.getStrokeJoin(), paint.getStrokeMiter(), paint.getStrokeCap(),
-                                width);
-
-}
diff --git a/src/gpu/GrGeometryProcessor.h b/src/gpu/GrGeometryProcessor.h
index 5035ad0..5306a0f 100644
--- a/src/gpu/GrGeometryProcessor.h
+++ b/src/gpu/GrGeometryProcessor.h
@@ -70,13 +70,6 @@
     static const Attribute& IthInitializedAttribute(int i) { return IthAttribute(i); }
 
 private:
-    // Since most subclasses don't use instancing provide a default implementation for that case.
-    const Attribute& onInstanceAttribute(int i) const override {
-        SK_ABORT("No instanced attributes");
-        static constexpr Attribute kBogus;
-        return kBogus;
-    }
-
     bool fWillUseGeoShader;
     float fSampleShading;
 
diff --git a/src/gpu/GrPrimitiveProcessor.h b/src/gpu/GrPrimitiveProcessor.h
index 4becb54..bd92f82 100644
--- a/src/gpu/GrPrimitiveProcessor.h
+++ b/src/gpu/GrPrimitiveProcessor.h
@@ -166,8 +166,18 @@
     inline static const TextureSampler& IthTextureSampler(int i);
 
 private:
-    virtual const Attribute& onVertexAttribute(int) const = 0;
-    virtual const Attribute& onInstanceAttribute(int) const = 0;
+    virtual const Attribute& onVertexAttribute(int) const {
+        SK_ABORT("No vertex attributes");
+        static constexpr Attribute kBogus;
+        return kBogus;
+    }
+
+    virtual const Attribute& onInstanceAttribute(int i) const {
+        SK_ABORT("No instanced attributes");
+        static constexpr Attribute kBogus;
+        return kBogus;
+    }
+
     virtual const TextureSampler& onTextureSampler(int) const { return IthTextureSampler(0); }
 
     int fVertexAttributeCnt = 0;
diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h
index 3c6662c..de42bc9 100644
--- a/src/gpu/GrProcessor.h
+++ b/src/gpu/GrProcessor.h
@@ -72,6 +72,7 @@
         kComposeOneFragmentProcessor_ClassID,
         kComposeTwoFragmentProcessor_ClassID,
         kCoverageSetOpXP_ClassID,
+        kCubicStrokeProcessor_ClassID,
         kCustomXP_ClassID,
         kDashingCircleEffect_ClassID,
         kDashingLineEffect_ClassID,
@@ -154,6 +155,7 @@
         kFlatNormalsFP_ClassID,
         kMappedNormalsFP_ClassID,
         kLightingFP_ClassID,
+        kLinearStrokeProcessor_ClassID,
     };
 
     virtual ~GrProcessor() = default;
diff --git a/src/gpu/ccpr/GrCCAtlas.cpp b/src/gpu/ccpr/GrCCAtlas.cpp
index 988d5fa..4ef632a 100644
--- a/src/gpu/ccpr/GrCCAtlas.cpp
+++ b/src/gpu/ccpr/GrCCAtlas.cpp
@@ -134,10 +134,16 @@
     return true;
 }
 
-void GrCCAtlas::setUserBatchID(int id) {
+void GrCCAtlas::setFillBatchID(int id) {
     // This can't be called anymore once makeRenderTargetContext() has been called.
     SkASSERT(!fTextureProxy->isInstantiated());
-    fUserBatchID = id;
+    fFillBatchID = id;
+}
+
+void GrCCAtlas::setStrokeBatchID(int id) {
+    // This can't be called anymore once makeRenderTargetContext() has been called.
+    SkASSERT(!fTextureProxy->isInstantiated());
+    fStrokeBatchID = id;
 }
 
 static uint32_t next_atlas_unique_id() {
diff --git a/src/gpu/ccpr/GrCCAtlas.h b/src/gpu/ccpr/GrCCAtlas.h
index 1a7ba1f..6412895 100644
--- a/src/gpu/ccpr/GrCCAtlas.h
+++ b/src/gpu/ccpr/GrCCAtlas.h
@@ -57,10 +57,12 @@
     bool addRect(const SkIRect& devIBounds, SkIVector* atlasOffset);
     const SkISize& drawBounds() { return fDrawBounds; }
 
-    // This is an optional space for the caller to jot down which user-defined batch to use when
+    // This is an optional space for the caller to jot down which user-defined batches to use when
     // they render the content of this atlas.
-    void setUserBatchID(int id);
-    int getUserBatchID() const { return fUserBatchID; }
+    void setFillBatchID(int id);
+    int getFillBatchID() const { return fFillBatchID; }
+    void setStrokeBatchID(int id);
+    int getStrokeBatchID() const { return fStrokeBatchID; }
 
     // Manages a unique resource cache key that gets assigned to the atlas texture. The unique key
     // does not get assigned to the texture proxy until it is instantiated.
@@ -98,7 +100,8 @@
     std::unique_ptr<Node> fTopNode;
     SkISize fDrawBounds = {0, 0};
 
-    int fUserBatchID;
+    int fFillBatchID;
+    int fStrokeBatchID;
 
     // Not every atlas will have a unique key -- a mainline CCPR one won't if we don't stash any
     // paths, and only the first atlas in the stack is eligible to be stashed.
diff --git a/src/gpu/ccpr/GrCCClipPath.cpp b/src/gpu/ccpr/GrCCClipPath.cpp
index 5702a96..d70b4f7 100644
--- a/src/gpu/ccpr/GrCCClipPath.cpp
+++ b/src/gpu/ccpr/GrCCClipPath.cpp
@@ -52,7 +52,7 @@
     SkASSERT(this->isInitialized());
 
     ++specs->fNumClipPaths;
-    specs->fRenderedPathStats.statPath(fDeviceSpacePath);
+    specs->fRenderedPathStats[GrCCPerFlushResourceSpecs::kFillIdx].statPath(fDeviceSpacePath);
 
     SkIRect ibounds;
     if (ibounds.intersect(fAccessRect, fPathDevIBounds)) {
diff --git a/src/gpu/ccpr/GrCCCoverageProcessor.h b/src/gpu/ccpr/GrCCCoverageProcessor.h
index e5b6bc1..c51f743 100644
--- a/src/gpu/ccpr/GrCCCoverageProcessor.h
+++ b/src/gpu/ccpr/GrCCCoverageProcessor.h
@@ -52,6 +52,7 @@
 
         void set(const SkPoint[3], const Sk2f& trans);
         void set(const SkPoint&, const SkPoint&, const SkPoint&, const Sk2f& trans);
+        void set(const Sk2f& P0, const Sk2f& P1, const Sk2f& P2, const Sk2f& trans);
     };
 
     // Defines a single primitive shape with 4 input points, or 3 input points plus a "weight"
@@ -64,6 +65,7 @@
         void set(const SkPoint[4], float dx, float dy);
         void setW(const SkPoint[3], const Sk2f& trans, float w);
         void setW(const SkPoint&, const SkPoint&, const SkPoint&, const Sk2f& trans, float w);
+        void setW(const Sk2f& P0, const Sk2f& P1, const Sk2f& P2, const Sk2f& trans, float w);
     };
 
     GrCCCoverageProcessor(GrResourceProvider* rp, PrimitiveType type)
@@ -298,10 +300,15 @@
 
 inline void GrCCCoverageProcessor::TriPointInstance::set(const SkPoint& p0, const SkPoint& p1,
                                                          const SkPoint& p2, const Sk2f& trans) {
-    Sk2f P0 = Sk2f::Load(&p0) + trans;
-    Sk2f P1 = Sk2f::Load(&p1) + trans;
-    Sk2f P2 = Sk2f::Load(&p2) + trans;
-    Sk2f::Store3(this, P0, P1, P2);
+    Sk2f P0 = Sk2f::Load(&p0);
+    Sk2f P1 = Sk2f::Load(&p1);
+    Sk2f P2 = Sk2f::Load(&p2);
+    this->set(P0, P1, P2, trans);
+}
+
+inline void GrCCCoverageProcessor::TriPointInstance::set(const Sk2f& P0, const Sk2f& P1,
+                                                         const Sk2f& P2, const Sk2f& trans) {
+    Sk2f::Store3(this, P0 + trans, P1 + trans, P2 + trans);
 }
 
 inline void GrCCCoverageProcessor::QuadPointInstance::set(const SkPoint p[4], float dx, float dy) {
@@ -319,11 +326,17 @@
 inline void GrCCCoverageProcessor::QuadPointInstance::setW(const SkPoint& p0, const SkPoint& p1,
                                                            const SkPoint& p2, const Sk2f& trans,
                                                            float w) {
-    Sk2f P0 = Sk2f::Load(&p0) + trans;
-    Sk2f P1 = Sk2f::Load(&p1) + trans;
-    Sk2f P2 = Sk2f::Load(&p2) + trans;
+    Sk2f P0 = Sk2f::Load(&p0);
+    Sk2f P1 = Sk2f::Load(&p1);
+    Sk2f P2 = Sk2f::Load(&p2);
+    this->setW(P0, P1, P2, trans, w);
+}
+
+inline void GrCCCoverageProcessor::QuadPointInstance::setW(const Sk2f& P0, const Sk2f& P1,
+                                                           const Sk2f& P2, const Sk2f& trans,
+                                                           float w) {
     Sk2f W = Sk2f(w);
-    Sk2f::Store4(this, P0, P1, P2, W);
+    Sk2f::Store4(this, P0 + trans, P1 + trans, P2 + trans, W);
 }
 
 #endif
diff --git a/src/gpu/ccpr/GrCCDrawPathsOp.cpp b/src/gpu/ccpr/GrCCDrawPathsOp.cpp
index aed0672..2ec8379 100644
--- a/src/gpu/ccpr/GrCCDrawPathsOp.cpp
+++ b/src/gpu/ccpr/GrCCDrawPathsOp.cpp
@@ -28,25 +28,82 @@
     return sk_64_mul(r.height(), r.width());
 }
 
-std::unique_ptr<GrCCDrawPathsOp> GrCCDrawPathsOp::Make(GrContext* context,
-                                                       const SkIRect& clipIBounds,
-                                                       const SkMatrix& m,
-                                                       const GrShape& shape,
-                                                       const SkRect& devBounds,
-                                                       GrPaint&& paint) {
-    SkIRect shapeDevIBounds;
-    devBounds.roundOut(&shapeDevIBounds);  // GrCCPathParser might find slightly tighter bounds.
+std::unique_ptr<GrCCDrawPathsOp> GrCCDrawPathsOp::Make(
+        GrContext* context, const SkIRect& clipIBounds, const SkMatrix& m, const GrShape& shape,
+        GrPaint&& paint) {
+    static constexpr int kPathCropThreshold = GrCoverageCountingPathRenderer::kPathCropThreshold;
+
+    SkRect conservativeDevBounds;
+    m.mapRect(&conservativeDevBounds, shape.bounds());
+
+    const SkStrokeRec& stroke = shape.style().strokeRec();
+    float strokeDevWidth = 0;
+    float conservativeInflationRadius = 0;
+    if (!stroke.isFillStyle()) {
+        if (stroke.isHairlineStyle()) {
+            strokeDevWidth = 1;
+        } else {
+            SkASSERT(m.isSimilarity());  // Otherwise matrixScaleFactor = m.getMaxScale().
+            float matrixScaleFactor = SkVector::Length(m.getScaleX(), m.getSkewY());
+            strokeDevWidth = stroke.getWidth() * matrixScaleFactor;
+        }
+        // Inflate for a minimum stroke width of 1. In some cases when the stroke is less than 1px
+        // wide, we may inflate it to 1px and instead reduce the opacity.
+        conservativeInflationRadius = SkStrokeRec::GetInflationRadius(
+                stroke.getJoin(), stroke.getMiter(), stroke.getCap(), SkTMax(strokeDevWidth, 1.f));
+        conservativeDevBounds.outset(conservativeInflationRadius, conservativeInflationRadius);
+    }
+
+    std::unique_ptr<GrCCDrawPathsOp> op;
+    float conservativeSize = SkTMax(conservativeDevBounds.height(), conservativeDevBounds.width());
+    if (conservativeSize > kPathCropThreshold) {
+        // The path is too large. Crop it or analytic AA can run out of fp32 precision.
+        SkPath croppedDevPath;
+        shape.asPath(&croppedDevPath);
+        croppedDevPath.transform(m, &croppedDevPath);
+
+        SkIRect cropBox = clipIBounds;
+        GrShape croppedDevShape;
+        if (stroke.isFillStyle()) {
+            GrCoverageCountingPathRenderer::CropPath(croppedDevPath, cropBox, &croppedDevPath);
+            croppedDevShape = GrShape(croppedDevPath);
+            conservativeDevBounds = croppedDevShape.bounds();
+        } else {
+            int r = SkScalarCeilToInt(conservativeInflationRadius);
+            cropBox.outset(r, r);
+            GrCoverageCountingPathRenderer::CropPath(croppedDevPath, cropBox, &croppedDevPath);
+            SkStrokeRec devStroke = stroke;
+            devStroke.setStrokeStyle(strokeDevWidth);
+            croppedDevShape = GrShape(croppedDevPath, GrStyle(devStroke, nullptr));
+            conservativeDevBounds = croppedDevPath.getBounds();
+            conservativeDevBounds.outset(conservativeInflationRadius, conservativeInflationRadius);
+        }
+
+        // FIXME: This breaks local coords: http://skbug.com/8003
+        return InternalMake(context, clipIBounds, SkMatrix::I(), croppedDevShape, strokeDevWidth,
+                            conservativeDevBounds, std::move(paint));
+    }
+
+    return InternalMake(context, clipIBounds, m, shape, strokeDevWidth, conservativeDevBounds,
+                        std::move(paint));
+}
+
+std::unique_ptr<GrCCDrawPathsOp> GrCCDrawPathsOp::InternalMake(
+        GrContext* context, const SkIRect& clipIBounds, const SkMatrix& m, const GrShape& shape,
+        float strokeDevWidth, const SkRect& conservativeDevBounds, GrPaint&& paint) {
+    SkIRect shapeConservativeIBounds;
+    conservativeDevBounds.roundOut(&shapeConservativeIBounds);
 
     SkIRect maskDevIBounds;
     Visibility maskVisibility;
-    if (clipIBounds.contains(shapeDevIBounds)) {
-        maskDevIBounds = shapeDevIBounds;
+    if (clipIBounds.contains(shapeConservativeIBounds)) {
+        maskDevIBounds = shapeConservativeIBounds;
         maskVisibility = Visibility::kComplete;
     } else {
-        if (!maskDevIBounds.intersect(clipIBounds, shapeDevIBounds)) {
+        if (!maskDevIBounds.intersect(clipIBounds, shapeConservativeIBounds)) {
             return nullptr;
         }
-        int64_t unclippedArea = area(shapeDevIBounds);
+        int64_t unclippedArea = area(shapeConservativeIBounds);
         int64_t clippedArea = area(maskDevIBounds);
         maskVisibility = (clippedArea >= unclippedArea/2 || unclippedArea < 100*100)
                 ? Visibility::kMostlyComplete  // i.e., visible enough to justify rendering the
@@ -56,22 +113,24 @@
 
     GrOpMemoryPool* pool = context->contextPriv().opMemoryPool();
 
-    return pool->allocate<GrCCDrawPathsOp>(m, shape, shapeDevIBounds, maskDevIBounds,
-                                           maskVisibility, devBounds, std::move(paint));
+    return pool->allocate<GrCCDrawPathsOp>(m, shape, strokeDevWidth, shapeConservativeIBounds,
+                                           maskDevIBounds, maskVisibility, conservativeDevBounds,
+                                           std::move(paint));
 }
 
-GrCCDrawPathsOp::GrCCDrawPathsOp(const SkMatrix& m, const GrShape& shape,
-                                 const SkIRect& shapeDevIBounds, const SkIRect& maskDevIBounds,
-                                 Visibility maskVisibility, const SkRect& devBounds,
-                                 GrPaint&& paint)
+GrCCDrawPathsOp::GrCCDrawPathsOp(const SkMatrix& m, const GrShape& shape, float strokeDevWidth,
+                                 const SkIRect& shapeConservativeIBounds,
+                                 const SkIRect& maskDevIBounds, Visibility maskVisibility,
+                                 const SkRect& conservativeDevBounds, GrPaint&& paint)
         : GrDrawOp(ClassID())
         , fViewMatrixIfUsingLocalCoords(has_coord_transforms(paint) ? m : SkMatrix::I())
-        , fDraws(m, shape, shapeDevIBounds, maskDevIBounds, maskVisibility, paint.getColor())
+        , fDraws(m, shape, strokeDevWidth, shapeConservativeIBounds, maskDevIBounds, maskVisibility,
+                 paint.getColor())
         , fProcessors(std::move(paint)) {  // Paint must be moved after fetching its color above.
     SkDEBUGCODE(fBaseInstance = -1);
     // FIXME: intersect with clip bounds to (hopefully) improve batching.
     // (This is nontrivial due to assumptions in generating the octagon cover geometry.)
-    this->setBounds(devBounds, GrOp::HasAABloat::kYes, GrOp::IsZeroArea::kNo);
+    this->setBounds(conservativeDevBounds, GrOp::HasAABloat::kYes, GrOp::IsZeroArea::kNo);
 }
 
 GrCCDrawPathsOp::~GrCCDrawPathsOp() {
@@ -82,12 +141,14 @@
 }
 
 GrCCDrawPathsOp::SingleDraw::SingleDraw(const SkMatrix& m, const GrShape& shape,
-                                        const SkIRect& shapeDevIBounds,
+                                        float strokeDevWidth,
+                                        const SkIRect& shapeConservativeIBounds,
                                         const SkIRect& maskDevIBounds, Visibility maskVisibility,
                                         GrColor color)
         : fMatrix(m)
         , fShape(shape)
-        , fShapeDevIBounds(shapeDevIBounds)
+        , fStrokeDevWidth(strokeDevWidth)
+        , fShapeConservativeIBounds(shapeConservativeIBounds)
         , fMaskDevIBounds(maskDevIBounds)
         , fMaskVisibility(maskVisibility)
         , fColor(color) {
@@ -111,9 +172,39 @@
 GrDrawOp::RequiresDstTexture GrCCDrawPathsOp::finalize(const GrCaps& caps,
                                                        const GrAppliedClip* clip) {
     SkASSERT(1 == fNumDraws);  // There should only be one single path draw in this Op right now.
-    GrProcessorSet::Analysis analysis =
-            fProcessors.finalize(fDraws.head().fColor, GrProcessorAnalysisCoverage::kSingleChannel,
-                                 clip, false, caps, &fDraws.head().fColor);
+    SingleDraw* draw = &fDraws.head();
+
+    const GrProcessorSet::Analysis& analysis = fProcessors.finalize(
+            draw->fColor, GrProcessorAnalysisCoverage::kSingleChannel, clip, false, caps,
+            &draw->fColor);
+
+    // Lines start looking jagged when they get thinner than 1px. For thin strokes it looks better
+    // if we can convert them to hairline (i.e., inflate the stroke width to 1px), and instead
+    // reduce the opacity to create the illusion of thin-ness. This strategy also helps reduce
+    // artifacts from coverage dilation when there are self intersections.
+    if (analysis.isCompatibleWithCoverageAsAlpha() &&
+            !draw->fShape.style().strokeRec().isFillStyle() && draw->fStrokeDevWidth < 1) {
+        // Modifying the shape affects its cache key. The draw can't have a cache entry yet or else
+        // our next step would invalidate it.
+        SkASSERT(!draw->fCacheEntry);
+        SkASSERT(SkStrokeRec::kStroke_Style == draw->fShape.style().strokeRec().getStyle());
+
+        SkPath path;
+        draw->fShape.asPath(&path);
+
+        // Create a hairline version of our stroke.
+        SkStrokeRec hairlineStroke = draw->fShape.style().strokeRec();
+        hairlineStroke.setStrokeStyle(0);
+
+        // How transparent does a 1px stroke have to be in order to appear as thin as the real one?
+        GrColor coverageAsAlpha = GrColorPackA4(SkScalarFloorToInt(draw->fStrokeDevWidth * 255));
+
+        draw->fShape = GrShape(path, GrStyle(hairlineStroke, nullptr));
+        draw->fStrokeDevWidth = 1;
+        // fShapeConservativeIBounds already accounted for this possibility of inflating the stroke.
+        draw->fColor = GrColorMul(draw->fColor, coverageAsAlpha);
+    }
+
     return RequiresDstTexture(analysis.requiresDstTexture());
 }
 
@@ -180,8 +271,11 @@
                 // can copy it into a new 8-bit atlas and keep it in the resource cache.
                 if (stashedAtlasKey.isValid() && stashedAtlasKey == cacheEntry->atlasKey()) {
                     SkASSERT(!cacheEntry->hasCachedAtlas());
-                    ++specs->fNumCopiedPaths;
-                    specs->fCopyPathStats.statPath(path);
+                    int idx = (draw.fShape.style().strokeRec().isFillStyle())
+                            ? GrCCPerFlushResourceSpecs::kFillIdx
+                            : GrCCPerFlushResourceSpecs::kStrokeIdx;
+                    ++specs->fNumCopiedPaths[idx];
+                    specs->fCopyPathStats[idx].statPath(path);
                     specs->fCopyAtlasSpecs.accountForSpace(cacheEntry->width(),
                                                            cacheEntry->height());
                     continue;
@@ -191,18 +285,23 @@
                 cacheEntry->resetAtlasKeyAndInfo();
             }
 
-            if (Visibility::kMostlyComplete == draw.fMaskVisibility && cacheEntry->hitCount() > 1 &&
-                SkTMax(draw.fShapeDevIBounds.height(),
-                       draw.fShapeDevIBounds.width()) <= onFlushRP->caps()->maxRenderTargetSize()) {
-                // We've seen this path before with a compatible matrix, and it's mostly visible.
-                // Just render the whole mask so we can try to cache it.
-                draw.fMaskDevIBounds = draw.fShapeDevIBounds;
-                draw.fMaskVisibility = Visibility::kComplete;
+            if (Visibility::kMostlyComplete == draw.fMaskVisibility && cacheEntry->hitCount() > 1) {
+                int shapeSize = SkTMax(draw.fShapeConservativeIBounds.height(),
+                                       draw.fShapeConservativeIBounds.width());
+                if (shapeSize <= onFlushRP->caps()->maxRenderTargetSize()) {
+                    // We've seen this path before with a compatible matrix, and it's mostly
+                    // visible. Just render the whole mask so we can try to cache it.
+                    draw.fMaskDevIBounds = draw.fShapeConservativeIBounds;
+                    draw.fMaskVisibility = Visibility::kComplete;
+                }
             }
         }
 
-        ++specs->fNumRenderedPaths;
-        specs->fRenderedPathStats.statPath(path);
+        int idx = (draw.fShape.style().strokeRec().isFillStyle())
+                ? GrCCPerFlushResourceSpecs::kFillIdx
+                : GrCCPerFlushResourceSpecs::kStrokeIdx;
+        ++specs->fNumRenderedPaths[idx];
+        specs->fRenderedPathStats[idx].statPath(path);
         specs->fRenderedAtlasSpecs.accountForSpace(draw.fMaskDevIBounds.width(),
                                                    draw.fMaskDevIBounds.height());
     }
@@ -219,7 +318,8 @@
         SkPath path;
         draw.fShape.asPath(&path);
 
-        auto doEvenOddFill = DoEvenOddFill(SkPath::kEvenOdd_FillType == path.getFillType());
+        auto doEvenOddFill = DoEvenOddFill(draw.fShape.style().strokeRec().isFillStyle() &&
+                                           SkPath::kEvenOdd_FillType == path.getFillType());
         SkASSERT(SkPath::kEvenOdd_FillType == path.getFillType() ||
                  SkPath::kWinding_FillType == path.getFillType());
 
@@ -270,9 +370,9 @@
         SkRect devBounds, devBounds45;
         SkIRect devIBounds;
         SkIVector devToAtlasOffset;
-        if (auto atlas = resources->renderPathInAtlas(draw.fMaskDevIBounds, draw.fMatrix, path,
-                                                      &devBounds, &devBounds45, &devIBounds,
-                                                      &devToAtlasOffset)) {
+        if (auto atlas = resources->renderShapeInAtlas(
+                    draw.fMaskDevIBounds, draw.fMatrix, draw.fShape, draw.fStrokeDevWidth,
+                    &devBounds, &devBounds45, &devIBounds, &devToAtlasOffset)) {
             this->recordInstance(atlas->textureProxy(), resources->nextPathInstanceIdx());
             resources->appendDrawPathInstance().set(devBounds, devBounds45, devToAtlasOffset,
                                                     draw.fColor, doEvenOddFill);
diff --git a/src/gpu/ccpr/GrCCDrawPathsOp.h b/src/gpu/ccpr/GrCCDrawPathsOp.h
index 40d9df4..2716d59 100644
--- a/src/gpu/ccpr/GrCCDrawPathsOp.h
+++ b/src/gpu/ccpr/GrCCDrawPathsOp.h
@@ -30,8 +30,7 @@
     SK_DECLARE_INTERNAL_LLIST_INTERFACE(GrCCDrawPathsOp);
 
     static std::unique_ptr<GrCCDrawPathsOp> Make(GrContext*, const SkIRect& clipIBounds,
-                                                 const SkMatrix&, const GrShape&,
-                                                 const SkRect& devBounds, GrPaint&&);
+                                                 const SkMatrix&, const GrShape&, GrPaint&&);
     ~GrCCDrawPathsOp() override;
 
     const char* name() const override { return "GrCCDrawPathsOp"; }
@@ -70,28 +69,35 @@
 private:
     friend class GrOpMemoryPool;
 
+    static std::unique_ptr<GrCCDrawPathsOp> InternalMake(GrContext*, const SkIRect& clipIBounds,
+                                                         const SkMatrix&, const GrShape&,
+                                                         float strokeDevWidth,
+                                                         const SkRect& conservativeDevBounds,
+                                                         GrPaint&&);
     enum class Visibility {
         kPartial,
         kMostlyComplete,  // (i.e., can we cache the whole path mask if we think it will be reused?)
         kComplete
     };
 
-    GrCCDrawPathsOp(const SkMatrix&, const GrShape&, const SkIRect& shapeDevIBounds,
-                    const SkIRect& maskDevIBounds, Visibility maskVisibility,
-                    const SkRect& devBounds, GrPaint&&);
+    GrCCDrawPathsOp(const SkMatrix&, const GrShape&, float strokeDevWidth,
+                    const SkIRect& shapeConservativeIBounds, const SkIRect& maskDevIBounds,
+                    Visibility maskVisibility, const SkRect& conservativeDevBounds, GrPaint&&);
 
     void recordInstance(GrTextureProxy* atlasProxy, int instanceIdx);
 
     const SkMatrix fViewMatrixIfUsingLocalCoords;
 
     struct SingleDraw {
-        SingleDraw(const SkMatrix&, const GrShape&, const SkIRect& shapeDevIBounds,
-                   const SkIRect& maskDevIBounds, Visibility maskVisibility, GrColor);
+        SingleDraw(const SkMatrix&, const GrShape&, float strokeDevWidth,
+                   const SkIRect& shapeConservativeIBounds, const SkIRect& maskDevIBounds,
+                   Visibility maskVisibility, GrColor);
         ~SingleDraw();
 
         SkMatrix fMatrix;
-        const GrShape fShape;
-        const SkIRect fShapeDevIBounds;
+        GrShape fShape;
+        float fStrokeDevWidth;
+        const SkIRect fShapeConservativeIBounds;
         SkIRect fMaskDevIBounds;
         Visibility fMaskVisibility;
         GrColor fColor;
diff --git a/src/gpu/ccpr/GrCCFiller.cpp b/src/gpu/ccpr/GrCCFiller.cpp
index cdace98..1460077 100644
--- a/src/gpu/ccpr/GrCCFiller.cpp
+++ b/src/gpu/ccpr/GrCCFiller.cpp
@@ -20,9 +20,8 @@
 using TriPointInstance = GrCCCoverageProcessor::TriPointInstance;
 using QuadPointInstance = GrCCCoverageProcessor::QuadPointInstance;
 
-GrCCFiller::GrCCFiller(int numPaths, const PathStats& pathStats)
-        : fGeometry(pathStats.fNumTotalSkPoints, pathStats.fNumTotalSkVerbs,
-                    pathStats.fNumTotalConicWeights)
+GrCCFiller::GrCCFiller(int numPaths, int numSkPoints, int numSkVerbs, int numConicWeights)
+        : fGeometry(numSkPoints, numSkVerbs, numConicWeights)
         , fPathInfos(numPaths)
         , fScissorSubBatches(numPaths)
         , fTotalPrimitiveCounts{PrimitiveTallies(), PrimitiveTallies()} {
diff --git a/src/gpu/ccpr/GrCCFiller.h b/src/gpu/ccpr/GrCCFiller.h
index 40dc657..45a03a4 100644
--- a/src/gpu/ccpr/GrCCFiller.h
+++ b/src/gpu/ccpr/GrCCFiller.h
@@ -9,7 +9,6 @@
 #define GrCCPathParser_DEFINED
 
 #include "GrMesh.h"
-#include "SkPath.h"
 #include "SkPathPriv.h"
 #include "SkRect.h"
 #include "SkRefCnt.h"
@@ -28,16 +27,7 @@
  */
 class GrCCFiller {
 public:
-    struct PathStats {
-        int fMaxPointsPerPath = 0;
-        int fNumTotalSkPoints = 0;
-        int fNumTotalSkVerbs = 0;
-        int fNumTotalConicWeights = 0;
-
-        void statPath(const SkPath&);
-    };
-
-    GrCCFiller(int numPaths, const PathStats&);
+    GrCCFiller(int numPaths, int numSkPoints, int numSkVerbs, int numConicWeights);
 
     // Parses a device-space SkPath into the current batch, using the SkPath's original verbs and
     // 'deviceSpacePts'. Accepts an optional post-device-space translate for placement in an atlas.
@@ -122,11 +112,4 @@
     mutable SkSTArray<32, SkIRect> fScissorRectScratchBuffer;
 };
 
-inline void GrCCFiller::PathStats::statPath(const SkPath& path) {
-    fMaxPointsPerPath = SkTMax(fMaxPointsPerPath, path.countPoints());
-    fNumTotalSkPoints += path.countPoints();
-    fNumTotalSkVerbs += path.countVerbs();
-    fNumTotalConicWeights += SkPathPriv::ConicWeightCnt(path);
-}
-
 #endif
diff --git a/src/gpu/ccpr/GrCCPathCache.cpp b/src/gpu/ccpr/GrCCPathCache.cpp
index 01781b0..7d3fe2a 100644
--- a/src/gpu/ccpr/GrCCPathCache.cpp
+++ b/src/gpu/ccpr/GrCCPathCache.cpp
@@ -45,27 +45,47 @@
 // Produces a key that accounts both for a shape's path geometry, as well as any stroke/style.
 class WriteStyledKey {
 public:
-    WriteStyledKey(const GrShape& shape)
-        : fShapeUnstyledKeyCount(shape.unstyledKeySize())
-        , fStyleKeyCount(
-                GrStyle::KeySize(shape.style(), GrStyle::Apply::kPathEffectAndStrokeRec)) {}
+    static constexpr int kStyledKeySizeInBytesIdx = 0;
+    static constexpr int kStrokeWidthIdx = 1;
+    static constexpr int kStrokeMiterIdx = 2;
+    static constexpr int kStrokeCapJoinIdx = 3;
+    static constexpr int kShapeUnstyledKeyIdx = 4;
+
+    static constexpr int kStrokeKeyCount = 3;  // [width, miterLimit, cap|join].
+
+    WriteStyledKey(const GrShape& shape) : fShapeUnstyledKeyCount(shape.unstyledKeySize()) {}
 
     // Returns the total number of uint32_t's to allocate for the key.
-    int allocCountU32() const { return 2 + fShapeUnstyledKeyCount + fStyleKeyCount; }
+    int allocCountU32() const { return kShapeUnstyledKeyIdx + fShapeUnstyledKeyCount; }
 
     // Writes the key to out[].
     void write(const GrShape& shape, uint32_t* out) {
-        // How many bytes remain in the key, beginning on out[1]?
-        out[0] = (1 + fShapeUnstyledKeyCount + fStyleKeyCount)  * sizeof(uint32_t);
-        out[1] = fStyleKeyCount;
-        shape.writeUnstyledKey(&out[2]);
-        GrStyle::WriteKey(&out[2 + fShapeUnstyledKeyCount], shape.style(),
-                          GrStyle::Apply::kPathEffectAndStrokeRec, 1);
+        out[kStyledKeySizeInBytesIdx] =
+                (kStrokeKeyCount + fShapeUnstyledKeyCount) * sizeof(uint32_t);
+
+        // Stroke key.
+        // We don't use GrStyle::WriteKey() because it does not account for hairlines.
+        // http://skbug.com/8273
+        SkASSERT(!shape.style().hasPathEffect());
+        const SkStrokeRec& stroke = shape.style().strokeRec();
+        if (stroke.isFillStyle()) {
+            // Use a value for width that won't collide with a valid fp32 value >= 0.
+            out[kStrokeWidthIdx] = ~0;
+            out[kStrokeMiterIdx] = out[kStrokeCapJoinIdx] = 0;
+        } else {
+            float width = stroke.getWidth(), miterLimit = stroke.getMiter();
+            memcpy(&out[kStrokeWidthIdx], &width, sizeof(float));
+            memcpy(&out[kStrokeMiterIdx], &miterLimit, sizeof(float));
+            out[kStrokeCapJoinIdx] = (stroke.getCap() << 16) | stroke.getJoin();
+            GR_STATIC_ASSERT(sizeof(out[kStrokeWidthIdx]) == sizeof(float));
+        }
+
+        // Shape unstyled key.
+        shape.writeUnstyledKey(&out[kShapeUnstyledKeyIdx]);
     }
 
 private:
     int fShapeUnstyledKeyCount;
-    int fStyleKeyCount;
 };
 
 }
diff --git a/src/gpu/ccpr/GrCCPerFlushResources.cpp b/src/gpu/ccpr/GrCCPerFlushResources.cpp
index 0ab4e5d..e761141 100644
--- a/src/gpu/ccpr/GrCCPerFlushResources.cpp
+++ b/src/gpu/ccpr/GrCCPerFlushResources.cpp
@@ -12,12 +12,17 @@
 #include "GrOnFlushResourceProvider.h"
 #include "GrSurfaceContextPriv.h"
 #include "GrRenderTargetContext.h"
+#include "GrShape.h"
 #include "SkMakeUnique.h"
 #include "ccpr/GrCCPathCache.h"
 
 using FillBatchID = GrCCFiller::BatchID;
+using StrokeBatchID = GrCCStroker::BatchID;
 using PathInstance = GrCCPathProcessor::Instance;
 
+static constexpr int kFillIdx = GrCCPerFlushResourceSpecs::kFillIdx;
+static constexpr int kStrokeIdx = GrCCPerFlushResourceSpecs::kStrokeIdx;
+
 namespace {
 
 // Base class for an Op that renders a CCPR atlas.
@@ -101,30 +106,35 @@
 
     static std::unique_ptr<GrDrawOp> Make(GrContext* context,
                                           sk_sp<const GrCCPerFlushResources> resources,
-                                          FillBatchID batchID, const SkISize& drawBounds) {
+                                          FillBatchID fillBatchID, StrokeBatchID strokeBatchID,
+                                          const SkISize& drawBounds) {
         GrOpMemoryPool* pool = context->contextPriv().opMemoryPool();
 
-        return pool->allocate<RenderAtlasOp>(std::move(resources), batchID, drawBounds);
+        return pool->allocate<RenderAtlasOp>(std::move(resources), fillBatchID, strokeBatchID,
+                                             drawBounds);
     }
 
     // GrDrawOp interface.
     const char* name() const override { return "RenderAtlasOp (CCPR)"; }
 
     void onExecute(GrOpFlushState* flushState) override {
-        fResources->filler().drawFills(flushState, fBatchID, fDrawBounds);
+        fResources->filler().drawFills(flushState, fFillBatchID, fDrawBounds);
+        fResources->stroker().drawStrokes(flushState, fStrokeBatchID, fDrawBounds);
     }
 
 private:
     friend class ::GrOpMemoryPool; // for ctor
 
-    RenderAtlasOp(sk_sp<const GrCCPerFlushResources> resources, FillBatchID batchID,
-                  const SkISize& drawBounds)
+    RenderAtlasOp(sk_sp<const GrCCPerFlushResources> resources, FillBatchID fillBatchID,
+                  StrokeBatchID strokeBatchID, const SkISize& drawBounds)
             : AtlasOp(ClassID(), std::move(resources), drawBounds)
-            , fBatchID(batchID)
+            , fFillBatchID(fillBatchID)
+            , fStrokeBatchID(strokeBatchID)
             , fDrawBounds(SkIRect::MakeWH(drawBounds.width(), drawBounds.height())) {
     }
 
-    const FillBatchID fBatchID;
+    const FillBatchID fFillBatchID;
+    const StrokeBatchID fStrokeBatchID;
     const SkIRect fDrawBounds;
 };
 
@@ -132,8 +142,10 @@
 
 static int inst_buffer_count(const GrCCPerFlushResourceSpecs& specs) {
     return specs.fNumCachedPaths +
-           specs.fNumCopiedPaths*2 +  // 1 copy + 1 draw.
-           specs.fNumRenderedPaths;
+           // Copies get two instances per draw: 1 copy + 1 draw.
+           (specs.fNumCopiedPaths[kFillIdx] + specs.fNumCopiedPaths[kStrokeIdx]) * 2 +
+           specs.fNumRenderedPaths[kFillIdx] + specs.fNumRenderedPaths[kStrokeIdx];
+           // No clips in instance buffers.
 }
 
 GrCCPerFlushResources::GrCCPerFlushResources(GrOnFlushResourceProvider* onFlushRP,
@@ -141,8 +153,15 @@
           // Overallocate by one point so we can call Sk4f::Store at the final SkPoint in the array.
           // (See transform_path_pts below.)
           // FIXME: instead use built-in instructions to write only the first two lanes of an Sk4f.
-        : fLocalDevPtsBuffer(specs.fRenderedPathStats.fMaxPointsPerPath + 1)
-        , fFiller(specs.fNumRenderedPaths + specs.fNumClipPaths, specs.fRenderedPathStats)
+        : fLocalDevPtsBuffer(SkTMax(specs.fRenderedPathStats[kFillIdx].fMaxPointsPerPath,
+                                    specs.fRenderedPathStats[kStrokeIdx].fMaxPointsPerPath) + 1)
+        , fFiller(specs.fNumRenderedPaths[kFillIdx] + specs.fNumClipPaths,
+                  specs.fRenderedPathStats[kFillIdx].fNumTotalSkPoints,
+                  specs.fRenderedPathStats[kFillIdx].fNumTotalSkVerbs,
+                  specs.fRenderedPathStats[kFillIdx].fNumTotalConicWeights)
+        , fStroker(specs.fNumRenderedPaths[kStrokeIdx],
+                   specs.fRenderedPathStats[kStrokeIdx].fNumTotalSkPoints,
+                   specs.fRenderedPathStats[kStrokeIdx].fNumTotalSkVerbs)
         , fCopyAtlasStack(kAlpha_8_GrPixelConfig, specs.fCopyAtlasSpecs, onFlushRP->caps())
         , fRenderedAtlasStack(kAlpha_half_GrPixelConfig, specs.fRenderedAtlasSpecs,
                               onFlushRP->caps())
@@ -151,7 +170,8 @@
         , fInstanceBuffer(onFlushRP->makeBuffer(kVertex_GrBufferType,
                                                 inst_buffer_count(specs) * sizeof(PathInstance)))
         , fNextCopyInstanceIdx(0)
-        , fNextPathInstanceIdx(specs.fNumCopiedPaths) {
+        , fNextPathInstanceIdx(specs.fNumCopiedPaths[kFillIdx] +
+                               specs.fNumCopiedPaths[kStrokeIdx]) {
     if (!fIndexBuffer) {
         SkDebugf("WARNING: failed to allocate CCPR index buffer. No paths will be drawn.\n");
         return;
@@ -166,7 +186,8 @@
     }
     fPathInstanceData = static_cast<PathInstance*>(fInstanceBuffer->map());
     SkASSERT(fPathInstanceData);
-    SkDEBUGCODE(fEndCopyInstance = specs.fNumCopiedPaths);
+    SkDEBUGCODE(fEndCopyInstance =
+                        specs.fNumCopiedPaths[kFillIdx] + specs.fNumCopiedPaths[kStrokeIdx]);
     SkDEBUGCODE(fEndPathInstance = inst_buffer_count(specs));
 }
 
@@ -180,7 +201,7 @@
     if (GrCCAtlas* retiredAtlas = fCopyAtlasStack.addRect(entry.devIBounds(), newAtlasOffset)) {
         // We did not fit in the previous copy atlas and it was retired. We will render the copies
         // up until fNextCopyInstanceIdx into the retired atlas during finalize().
-        retiredAtlas->setUserBatchID(fNextCopyInstanceIdx);
+        retiredAtlas->setFillBatchID(fNextCopyInstanceIdx);
     }
 
     fPathInstanceData[fNextCopyInstanceIdx++].set(entry, *newAtlasOffset, GrColor_WHITE, evenOdd);
@@ -237,20 +258,29 @@
                          bottomRightPts[1].y());
 }
 
-const GrCCAtlas* GrCCPerFlushResources::renderPathInAtlas(const SkIRect& clipIBounds,
-                                                          const SkMatrix& m, const SkPath& path,
-                                                          SkRect* devBounds, SkRect* devBounds45,
-                                                          SkIRect* devIBounds,
-                                                          SkIVector* devToAtlasOffset) {
+const GrCCAtlas* GrCCPerFlushResources::renderShapeInAtlas(
+        const SkIRect& clipIBounds, const SkMatrix& m, const GrShape& shape, float strokeDevWidth,
+        SkRect* devBounds, SkRect* devBounds45, SkIRect* devIBounds, SkIVector* devToAtlasOffset) {
     SkASSERT(this->isMapped());
     SkASSERT(fNextPathInstanceIdx < fEndPathInstance);
 
+    SkPath path;
+    shape.asPath(&path);
     if (path.isEmpty()) {
         SkDEBUGCODE(--fEndPathInstance);
         return nullptr;
     }
-
     transform_path_pts(m, path, fLocalDevPtsBuffer, devBounds, devBounds45);
+
+    const SkStrokeRec& stroke = shape.style().strokeRec();
+    if (!stroke.isFillStyle()) {
+        float r = SkStrokeRec::GetInflationRadius(stroke.getJoin(), stroke.getMiter(),
+                                                  stroke.getCap(), strokeDevWidth);
+        devBounds->outset(r, r);
+        // devBounds45 is in (| 1 -1 | * devCoords) space.
+        //                    | 1  1 |
+        devBounds45->outset(r*SK_ScalarSqrt2, r*SK_ScalarSqrt2);
+    }
     devBounds->roundOut(devIBounds);
 
     GrScissorTest scissorTest;
@@ -261,8 +291,17 @@
         return nullptr;  // Path was degenerate or clipped away.
     }
 
-    fFiller.parseDeviceSpaceFill(path, fLocalDevPtsBuffer.begin(), scissorTest, clippedPathIBounds,
-                                 *devToAtlasOffset);
+    if (stroke.isFillStyle()) {
+        SkASSERT(0 == strokeDevWidth);
+        fFiller.parseDeviceSpaceFill(path, fLocalDevPtsBuffer.begin(), scissorTest,
+                                     clippedPathIBounds, *devToAtlasOffset);
+    } else {
+        // Stroke-and-fill is not yet supported.
+        SkASSERT(SkStrokeRec::kStroke_Style == stroke.getStyle() || stroke.isHairlineStyle());
+        SkASSERT(!stroke.isHairlineStyle() || 1 == strokeDevWidth);
+        fStroker.parseDeviceSpaceStroke(path, fLocalDevPtsBuffer.begin(), stroke, strokeDevWidth,
+                                        scissorTest, clippedPathIBounds, *devToAtlasOffset);
+    }
     return &fRenderedAtlasStack.current();
 }
 
@@ -306,8 +345,8 @@
         // We did not fit in the previous coverage count atlas and it was retired. Close the path
         // parser's current batch (which does not yet include the path we just parsed). We will
         // render this batch into the retired atlas during finalize().
-        FillBatchID batchID = fFiller.closeCurrentBatch();
-        retiredAtlas->setUserBatchID(batchID);
+        retiredAtlas->setFillBatchID(fFiller.closeCurrentBatch());
+        retiredAtlas->setStrokeBatchID(fStroker.closeCurrentBatch());
     }
     return true;
 }
@@ -323,23 +362,26 @@
     fPathInstanceData = nullptr;
 
     if (!fCopyAtlasStack.empty()) {
-        fCopyAtlasStack.current().setUserBatchID(fNextCopyInstanceIdx);
+        fCopyAtlasStack.current().setFillBatchID(fNextCopyInstanceIdx);
     }
     if (!fRenderedAtlasStack.empty()) {
-        FillBatchID batchID = fFiller.closeCurrentBatch();
-        fRenderedAtlasStack.current().setUserBatchID(batchID);
+        fRenderedAtlasStack.current().setFillBatchID(fFiller.closeCurrentBatch());
+        fRenderedAtlasStack.current().setStrokeBatchID(fStroker.closeCurrentBatch());
     }
 
     // Build the GPU buffers to render path coverage counts. (This must not happen until after the
-    // final call to fPathParser.closeCurrentBatch().)
+    // final calls to fFiller/fStroker.closeCurrentBatch().)
     if (!fFiller.prepareToDraw(onFlushRP)) {
         return false;
     }
+    if (!fStroker.prepareToDraw(onFlushRP)) {
+        return false;
+    }
 
     // Draw the copies from the stashed atlas into 8-bit cached atlas(es).
     int baseCopyInstance = 0;
     for (GrCCAtlasStack::Iter atlas(fCopyAtlasStack); atlas.next();) {
-        int endCopyInstance = atlas->getUserBatchID();
+        int endCopyInstance = atlas->getFillBatchID();
         if (endCopyInstance <= baseCopyInstance) {
             SkASSERT(endCopyInstance == baseCopyInstance);
             continue;
@@ -367,7 +409,8 @@
 
         if (auto rtc = atlas->makeRenderTargetContext(onFlushRP, std::move(backingTexture))) {
             auto op = RenderAtlasOp::Make(rtc->surfPriv().getContext(), sk_ref_sp(this),
-                                          atlas->getUserBatchID(), atlas->drawBounds());
+                                          atlas->getFillBatchID(), atlas->getStrokeBatchID(),
+                                          atlas->drawBounds());
             rtc->addDrawOp(GrNoClip(), std::move(op));
             out->push_back(std::move(rtc));
         }
@@ -377,8 +420,17 @@
 }
 
 void GrCCPerFlushResourceSpecs::convertCopiesToRenders() {
-    fNumRenderedPaths += fNumCopiedPaths;
-    fNumCopiedPaths = 0;
+    for (int i = 0; i < 2; ++i) {
+        fNumRenderedPaths[i] += fNumCopiedPaths[i];
+        fNumCopiedPaths[i] = 0;
+
+        fRenderedPathStats[i].fMaxPointsPerPath =
+               SkTMax(fRenderedPathStats[i].fMaxPointsPerPath, fCopyPathStats[i].fMaxPointsPerPath);
+        fRenderedPathStats[i].fNumTotalSkPoints += fCopyPathStats[i].fNumTotalSkPoints;
+        fRenderedPathStats[i].fNumTotalSkVerbs += fCopyPathStats[i].fNumTotalSkVerbs;
+        fRenderedPathStats[i].fNumTotalConicWeights += fCopyPathStats[i].fNumTotalConicWeights;
+        fCopyPathStats[i] = GrCCRenderedPathStats();
+    }
 
     fRenderedAtlasSpecs.fApproxNumPixels += fCopyAtlasSpecs.fApproxNumPixels;
     fRenderedAtlasSpecs.fMinWidth =
@@ -386,11 +438,4 @@
     fRenderedAtlasSpecs.fMinHeight =
             SkTMax(fRenderedAtlasSpecs.fMinHeight, fCopyAtlasSpecs.fMinHeight);
     fCopyAtlasSpecs = GrCCAtlas::Specs();
-
-    fRenderedPathStats.fMaxPointsPerPath =
-            SkTMax(fRenderedPathStats.fMaxPointsPerPath, fCopyPathStats.fMaxPointsPerPath);
-    fRenderedPathStats.fNumTotalSkPoints += fCopyPathStats.fNumTotalSkPoints;
-    fRenderedPathStats.fNumTotalSkVerbs += fCopyPathStats.fNumTotalSkVerbs;
-    fRenderedPathStats.fNumTotalConicWeights += fCopyPathStats.fNumTotalConicWeights;
-    fCopyPathStats = GrCCFiller::PathStats();
 }
diff --git a/src/gpu/ccpr/GrCCPerFlushResources.h b/src/gpu/ccpr/GrCCPerFlushResources.h
index 3fa392e..132068f 100644
--- a/src/gpu/ccpr/GrCCPerFlushResources.h
+++ b/src/gpu/ccpr/GrCCPerFlushResources.h
@@ -11,29 +11,47 @@
 #include "GrNonAtomicRef.h"
 #include "ccpr/GrCCAtlas.h"
 #include "ccpr/GrCCFiller.h"
+#include "ccpr/GrCCStroker.h"
 #include "ccpr/GrCCPathProcessor.h"
 
 class GrCCPathCacheEntry;
 class GrOnFlushResourceProvider;
+class GrShape;
+
+/**
+ * This struct counts values that help us preallocate buffers for rendered path geometry.
+ */
+struct GrCCRenderedPathStats {
+    int fMaxPointsPerPath = 0;
+    int fNumTotalSkPoints = 0;
+    int fNumTotalSkVerbs = 0;
+    int fNumTotalConicWeights = 0;
+
+    void statPath(const SkPath&);
+};
 
 /**
  * This struct encapsulates the minimum and desired requirements for the GPU resources required by
  * CCPR in a given flush.
  */
 struct GrCCPerFlushResourceSpecs {
+    static constexpr int kFillIdx = 0;
+    static constexpr int kStrokeIdx = 1;
+
     int fNumCachedPaths = 0;
 
-    int fNumCopiedPaths = 0;
-    GrCCFiller::PathStats fCopyPathStats;
+    int fNumCopiedPaths[2] = {0, 0};
+    GrCCRenderedPathStats fCopyPathStats[2];
     GrCCAtlas::Specs fCopyAtlasSpecs;
 
-    int fNumRenderedPaths = 0;
+    int fNumRenderedPaths[2] = {0, 0};
     int fNumClipPaths = 0;
-    GrCCFiller::PathStats fRenderedPathStats;
+    GrCCRenderedPathStats fRenderedPathStats[2];
     GrCCAtlas::Specs fRenderedAtlasSpecs;
 
     bool isEmpty() const {
-        return 0 == fNumCachedPaths + fNumCopiedPaths + fNumRenderedPaths + fNumClipPaths;
+        return 0 == fNumCachedPaths + fNumCopiedPaths[kFillIdx] + fNumCopiedPaths[kStrokeIdx] +
+                    fNumRenderedPaths[kFillIdx] + fNumRenderedPaths[kStrokeIdx] + fNumClipPaths;
     }
     void convertCopiesToRenders();
 };
@@ -55,12 +73,16 @@
     GrCCAtlas* copyPathToCachedAtlas(const GrCCPathCacheEntry&, GrCCPathProcessor::DoEvenOddFill,
                                      SkIVector* newAtlasOffset);
 
-    // These two methods render a path into a temporary coverage count atlas. See GrCCPathParser for
-    // a description of the arguments. The returned atlases are "const" to prevent the caller from
-    // assigning a unique key.
-    const GrCCAtlas* renderPathInAtlas(const SkIRect& clipIBounds, const SkMatrix&, const SkPath&,
-                                       SkRect* devBounds, SkRect* devBounds45, SkIRect* devIBounds,
-                                       SkIVector* devToAtlasOffset);
+    // These two methods render a path into a temporary coverage count atlas. See
+    // GrCCPathProcessor::Instance for a description of the outputs. The returned atlases are
+    // "const" to prevent the caller from assigning a unique key.
+    //
+    // strokeDevWidth must be 0 for fills, 1 for hairlines, or the stroke width in device-space
+    // pixels for non-hairline strokes (implicitly requiring a rigid-body transform).
+    const GrCCAtlas* renderShapeInAtlas(const SkIRect& clipIBounds, const SkMatrix&, const GrShape&,
+                                        float strokeDevWidth, SkRect* devBounds,
+                                        SkRect* devBounds45, SkIRect* devIBounds,
+                                        SkIVector* devToAtlasOffset);
     const GrCCAtlas* renderDeviceSpacePathInAtlas(const SkIRect& clipIBounds, const SkPath& devPath,
                                                   const SkIRect& devPathIBounds,
                                                   SkIVector* devToAtlasOffset);
@@ -86,6 +108,7 @@
 
     // Accessors used by draw calls, once the resources have been finalized.
     const GrCCFiller& filler() const { SkASSERT(!this->isMapped()); return fFiller; }
+    const GrCCStroker& stroker() const { SkASSERT(!this->isMapped()); return fStroker; }
     const GrBuffer* indexBuffer() const { SkASSERT(!this->isMapped()); return fIndexBuffer.get(); }
     const GrBuffer* vertexBuffer() const { SkASSERT(!this->isMapped()); return fVertexBuffer.get();}
     GrBuffer* instanceBuffer() const { SkASSERT(!this->isMapped()); return fInstanceBuffer.get(); }
@@ -113,6 +136,7 @@
 
     const SkAutoSTArray<32, SkPoint> fLocalDevPtsBuffer;
     GrCCFiller fFiller;
+    GrCCStroker fStroker;
     GrCCAtlasStack fCopyAtlasStack;
     GrCCAtlasStack fRenderedAtlasStack;
 
@@ -127,4 +151,11 @@
     SkDEBUGCODE(int fEndPathInstance);
 };
 
+inline void GrCCRenderedPathStats::statPath(const SkPath& path) {
+    fMaxPointsPerPath = SkTMax(fMaxPointsPerPath, path.countPoints());
+    fNumTotalSkPoints += path.countPoints();
+    fNumTotalSkVerbs += path.countVerbs();
+    fNumTotalConicWeights += SkPathPriv::ConicWeightCnt(path);
+}
+
 #endif
diff --git a/src/gpu/ccpr/GrCCStrokeGeometry.cpp b/src/gpu/ccpr/GrCCStrokeGeometry.cpp
new file mode 100644
index 0000000..3fcafec
--- /dev/null
+++ b/src/gpu/ccpr/GrCCStrokeGeometry.cpp
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCStrokeGeometry.h"
+
+#include "SkGeometry.h"
+#include "SkMathPriv.h"
+#include "SkNx.h"
+#include "SkStrokeRec.h"
+
+// This is the maximum distance in pixels that we can stray from the edge of a stroke when
+// converting it to flat line segments.
+static constexpr float kMaxErrorFromLinearization = 1/8.f;
+
+static inline float length(const Sk2f& n) {
+    Sk2f nn = n*n;
+    return SkScalarSqrt(nn[0] + nn[1]);
+}
+
+static inline Sk2f normalize(const Sk2f& v) {
+    Sk2f vv = v*v;
+    vv += SkNx_shuffle<1,0>(vv);
+    return v * vv.rsqrt();
+}
+
+static inline void transpose(const Sk2f& a, const Sk2f& b, Sk2f* X, Sk2f* Y) {
+    float transpose[4];
+    a.store(transpose);
+    b.store(transpose+2);
+    Sk2f::Load2(transpose, X, Y);
+}
+
+static inline void normalize2(const Sk2f& v0, const Sk2f& v1, SkPoint out[2]) {
+    Sk2f X, Y;
+    transpose(v0, v1, &X, &Y);
+    Sk2f invlength = (X*X + Y*Y).rsqrt();
+    Sk2f::Store2(out, Y * invlength, -X * invlength);
+}
+
+static inline float calc_curvature_costheta(const Sk2f& leftTan, const Sk2f& rightTan) {
+    Sk2f X, Y;
+    transpose(leftTan, rightTan, &X, &Y);
+    Sk2f invlength = (X*X + Y*Y).rsqrt();
+    Sk2f dotprod = leftTan * rightTan;
+    return (dotprod[0] + dotprod[1]) * invlength[0] * invlength[1];
+}
+
+static GrCCStrokeGeometry::Verb join_verb_from_join(SkPaint::Join join) {
+    using Verb = GrCCStrokeGeometry::Verb;
+    switch (join) {
+        case SkPaint::kBevel_Join:
+            return Verb::kBevelJoin;
+        case SkPaint::kMiter_Join:
+            return Verb::kMiterJoin;
+        case SkPaint::kRound_Join:
+            return Verb::kRoundJoin;
+    }
+    SK_ABORT("Invalid SkPaint::Join.");
+    return Verb::kBevelJoin;
+}
+
+void GrCCStrokeGeometry::beginPath(const SkStrokeRec& stroke, float strokeDevWidth,
+                                   InstanceTallies* tallies) {
+    SkASSERT(!fInsideContour);
+    // Client should have already converted the stroke to device space (i.e. width=1 for hairline).
+    SkASSERT(strokeDevWidth > 0);
+
+    fCurrStrokeRadius = strokeDevWidth/2;
+    fCurrStrokeJoinVerb = join_verb_from_join(stroke.getJoin());
+    fCurrStrokeCapType = stroke.getCap();
+    fCurrStrokeTallies = tallies;
+
+    if (Verb::kMiterJoin == fCurrStrokeJoinVerb) {
+        // We implement miters by placing a triangle-shaped cap on top of a bevel join. Convert the
+        // "miter limit" to how tall that triangle cap can be.
+        float m = stroke.getMiter();
+        fMiterMaxCapHeightOverWidth = .5f * SkScalarSqrt(m*m - 1);
+    }
+
+    // Find the angle of curvature where the arc height above a simple line from point A to point B
+    // is equal to kMaxErrorFromLinearization.
+    float r = SkTMax(1 - kMaxErrorFromLinearization / fCurrStrokeRadius, 0.f);
+    fMaxCurvatureCosTheta = 2*r*r - 1;
+
+    fCurrContourFirstPtIdx = -1;
+    fCurrContourFirstNormalIdx = -1;
+
+    fVerbs.push_back(Verb::kBeginPath);
+}
+
+void GrCCStrokeGeometry::moveTo(SkPoint pt) {
+    SkASSERT(!fInsideContour);
+    fCurrContourFirstPtIdx = fPoints.count();
+    fCurrContourFirstNormalIdx = fNormals.count();
+    fPoints.push_back(pt);
+    SkDEBUGCODE(fInsideContour = true);
+}
+
+void GrCCStrokeGeometry::lineTo(SkPoint pt) {
+    SkASSERT(fInsideContour);
+    this->lineTo(fCurrStrokeJoinVerb, pt);
+}
+
+void GrCCStrokeGeometry::lineTo(Verb leftJoinVerb, SkPoint pt) {
+    Sk2f tan = Sk2f::Load(&pt) - Sk2f::Load(&fPoints.back());
+    if ((tan == 0).allTrue()) {
+        return;
+    }
+
+    tan = normalize(tan);
+    SkVector n = SkVector::Make(tan[1], -tan[0]);
+
+    this->recordLeftJoinIfNotEmpty(leftJoinVerb, n);
+    fNormals.push_back(n);
+
+    this->recordStroke(Verb::kLinearStroke, 0);
+    fPoints.push_back(pt);
+}
+
+void GrCCStrokeGeometry::quadraticTo(const SkPoint P[3]) {
+    SkASSERT(fInsideContour);
+    this->quadraticTo(fCurrStrokeJoinVerb, P, SkFindQuadMaxCurvature(P));
+}
+
+// Wang's formula for quadratics (1985) gives us the number of evenly spaced (in the parametric
+// sense) line segments that are guaranteed to be within a distance of "kMaxErrorFromLinearization"
+// from the actual curve.
+static inline float wangs_formula_quadratic(const Sk2f& p0, const Sk2f& p1, const Sk2f& p2) {
+    static constexpr float k = 2 / (8 * kMaxErrorFromLinearization);
+    float f = SkScalarSqrt(k * length(p2 - p1*2 + p0));
+    return SkScalarCeilToInt(f);
+}
+
+void GrCCStrokeGeometry::quadraticTo(Verb leftJoinVerb, const SkPoint P[3], float maxCurvatureT) {
+    Sk2f p0 = Sk2f::Load(P);
+    Sk2f p1 = Sk2f::Load(P+1);
+    Sk2f p2 = Sk2f::Load(P+2);
+
+    Sk2f tan0 = p1 - p0;
+    Sk2f tan1 = p2 - p1;
+
+    // Snap to a "lineTo" if the control point is so close to an endpoint that FP error will become
+    // an issue.
+    if ((tan0.abs() < SK_ScalarNearlyZero).allTrue() ||  // p0 ~= p1
+        (tan1.abs() < SK_ScalarNearlyZero).allTrue()) {  // p1 ~= p2
+        this->lineTo(leftJoinVerb, P[2]);
+        return;
+    }
+
+    SkPoint normals[2];
+    normalize2(tan0, tan1, normals);
+
+    // Decide how many flat line segments to chop the curve into.
+    int numSegments = wangs_formula_quadratic(p0, p1, p2);
+    if (numSegments <= 1) {
+        this->rotateTo(leftJoinVerb, normals[0]);
+        this->lineTo(Verb::kInternalRoundJoin, P[2]);
+        this->rotateTo(Verb::kInternalRoundJoin, normals[1]);
+        return;
+    }
+
+    // At + B gives a vector tangent to the quadratic.
+    Sk2f A = p0 - p1*2 + p2;
+    Sk2f B = p1 - p0;
+
+    // Find a line segment that crosses max curvature.
+    float segmentLength = SkScalarInvert(numSegments);
+    float leftT = maxCurvatureT - segmentLength/2;
+    float rightT = maxCurvatureT + segmentLength/2;
+    Sk2f leftTan, rightTan;
+    if (leftT <= 0) {
+        leftT = 0;
+        leftTan = tan0;
+        rightT = segmentLength;
+        rightTan = A*rightT + B;
+    } else if (rightT >= 1) {
+        leftT = 1 - segmentLength;
+        leftTan = A*leftT + B;
+        rightT = 1;
+        rightTan = tan1;
+    } else {
+        leftTan = A*leftT + B;
+        rightTan = A*rightT + B;
+    }
+
+    // Check if curvature is too strong for a triangle strip on the line segment that crosses max
+    // curvature. If it is, we will chop and convert the segment to a "lineTo" with round joins.
+    //
+    // FIXME: This is quite costly and the vast majority of curves only have moderate curvature. We
+    // would benefit significantly from a quick reject that detects curves that don't need special
+    // treatment for strong curvature.
+    bool isCurvatureTooStrong = calc_curvature_costheta(leftTan, rightTan) < fMaxCurvatureCosTheta;
+    if (isCurvatureTooStrong) {
+        SkPoint ptsBuffer[5];
+        const SkPoint* currQuadratic = P;
+
+        if (leftT > 0) {
+            SkChopQuadAt(currQuadratic, ptsBuffer, leftT);
+            this->quadraticTo(leftJoinVerb, ptsBuffer, /*maxCurvatureT=*/1);
+            if (rightT < 1) {
+                rightT = (rightT - leftT) / (1 - leftT);
+            }
+            currQuadratic = ptsBuffer + 2;
+        } else {
+            this->rotateTo(leftJoinVerb, normals[0]);
+        }
+
+        if (rightT < 1) {
+            SkChopQuadAt(currQuadratic, ptsBuffer, rightT);
+            this->lineTo(Verb::kInternalRoundJoin, ptsBuffer[2]);
+            this->quadraticTo(Verb::kInternalRoundJoin, ptsBuffer + 2, /*maxCurvatureT=*/0);
+        } else {
+            this->lineTo(Verb::kInternalRoundJoin, currQuadratic[2]);
+            this->rotateTo(Verb::kInternalRoundJoin, normals[1]);
+        }
+        return;
+    }
+
+    this->recordLeftJoinIfNotEmpty(leftJoinVerb, normals[0]);
+    fNormals.push_back_n(2, normals);
+
+    this->recordStroke(Verb::kQuadraticStroke, SkNextLog2(numSegments));
+    p1.store(&fPoints.push_back());
+    p2.store(&fPoints.push_back());
+}
+
+void GrCCStrokeGeometry::cubicTo(const SkPoint P[4]) {
+    SkASSERT(fInsideContour);
+    float roots[3];
+    int numRoots = SkFindCubicMaxCurvature(P, roots);
+    this->cubicTo(fCurrStrokeJoinVerb, P,
+                  numRoots > 0 ? roots[numRoots/2] : 0,
+                  numRoots > 1 ? roots[0] : kLeftMaxCurvatureNone,
+                  numRoots > 2 ? roots[2] : kRightMaxCurvatureNone);
+}
+
+// Wang's formula for cubics (1985) gives us the number of evenly spaced (in the parametric sense)
+// line segments that are guaranteed to be within a distance of "kMaxErrorFromLinearization"
+// from the actual curve.
+static inline float wangs_formula_cubic(const Sk2f& p0, const Sk2f& p1, const Sk2f& p2,
+                                        const Sk2f& p3) {
+    static constexpr float k = (3 * 2) / (8 * kMaxErrorFromLinearization);
+    float f = SkScalarSqrt(k * length(Sk2f::Max((p2 - p1*2 + p0).abs(),
+                                                (p3 - p2*2 + p1).abs())));
+    return SkScalarCeilToInt(f);
+}
+
+void GrCCStrokeGeometry::cubicTo(Verb leftJoinVerb, const SkPoint P[4], float maxCurvatureT,
+                                 float leftMaxCurvatureT, float rightMaxCurvatureT) {
+    Sk2f p0 = Sk2f::Load(P);
+    Sk2f p1 = Sk2f::Load(P+1);
+    Sk2f p2 = Sk2f::Load(P+2);
+    Sk2f p3 = Sk2f::Load(P+3);
+
+    Sk2f tan0 = p1 - p0;
+    Sk2f tan1 = p3 - p2;
+
+    // Snap control points to endpoints if they are so close that FP error will become an issue.
+    if ((tan0.abs() < SK_ScalarNearlyZero).allTrue()) {  // p0 ~= p1
+        p1 = p0;
+        tan0 = p2 - p0;
+        if ((tan0.abs() < SK_ScalarNearlyZero).allTrue()) {  // p0 ~= p1 ~= p2
+            this->lineTo(leftJoinVerb, P[3]);
+            return;
+        }
+    }
+    if ((tan1.abs() < SK_ScalarNearlyZero).allTrue()) {  // p2 ~= p3
+        p2 = p3;
+        tan1 = p3 - p1;
+        if ((tan1.abs() < SK_ScalarNearlyZero).allTrue() ||  // p1 ~= p2 ~= p3
+            (p0 == p1).allTrue()) {  // p0 ~= p1 AND p2 ~= p3
+            this->lineTo(leftJoinVerb, P[3]);
+            return;
+        }
+    }
+
+    SkPoint normals[2];
+    normalize2(tan0, tan1, normals);
+
+    // Decide how many flat line segments to chop the curve into.
+    int numSegments = wangs_formula_cubic(p0, p1, p2, p3);
+    if (numSegments <= 1) {
+        this->rotateTo(leftJoinVerb, normals[0]);
+        this->lineTo(leftJoinVerb, P[3]);
+        this->rotateTo(Verb::kInternalRoundJoin, normals[1]);
+        return;
+    }
+
+    // At^2 + Bt + C gives a vector tangent to the cubic. (More specifically, it's the derivative
+    // minus an irrelevant scale by 3, since all we care about is the direction.)
+    Sk2f A = p3 + (p1 - p2)*3 - p0;
+    Sk2f B = (p0 - p1*2 + p2)*2;
+    Sk2f C = p1 - p0;
+
+    // Find a line segment that crosses max curvature.
+    float segmentLength = SkScalarInvert(numSegments);
+    float leftT = maxCurvatureT - segmentLength/2;
+    float rightT = maxCurvatureT + segmentLength/2;
+    Sk2f leftTan, rightTan;
+    if (leftT <= 0) {
+        leftT = 0;
+        leftTan = tan0;
+        rightT = segmentLength;
+        rightTan = A*rightT*rightT + B*rightT + C;
+    } else if (rightT >= 1) {
+        leftT = 1 - segmentLength;
+        leftTan = A*leftT*leftT + B*leftT + C;
+        rightT = 1;
+        rightTan = tan1;
+    } else {
+        leftTan = A*leftT*leftT + B*leftT + C;
+        rightTan = A*rightT*rightT + B*rightT + C;
+    }
+
+    // Check if curvature is too strong for a triangle strip on the line segment that crosses max
+    // curvature. If it is, we will chop and convert the segment to a "lineTo" with round joins.
+    //
+    // FIXME: This is quite costly and the vast majority of curves only have moderate curvature. We
+    // would benefit significantly from a quick reject that detects curves that don't need special
+    // treatment for strong curvature.
+    bool isCurvatureTooStrong = calc_curvature_costheta(leftTan, rightTan) < fMaxCurvatureCosTheta;
+    if (isCurvatureTooStrong) {
+        SkPoint ptsBuffer[7];
+        p0.store(ptsBuffer);
+        p1.store(ptsBuffer + 1);
+        p2.store(ptsBuffer + 2);
+        p3.store(ptsBuffer + 3);
+        const SkPoint* currCubic = ptsBuffer;
+
+        if (leftT > 0) {
+            SkChopCubicAt(currCubic, ptsBuffer, leftT);
+            this->cubicTo(leftJoinVerb, ptsBuffer, /*maxCurvatureT=*/1,
+                          (kLeftMaxCurvatureNone != leftMaxCurvatureT)
+                                  ? leftMaxCurvatureT/leftT : kLeftMaxCurvatureNone,
+                          kRightMaxCurvatureNone);
+            if (rightT < 1) {
+                rightT = (rightT - leftT) / (1 - leftT);
+            }
+            if (rightMaxCurvatureT < 1 && kRightMaxCurvatureNone != rightMaxCurvatureT) {
+                rightMaxCurvatureT = (rightMaxCurvatureT - leftT) / (1 - leftT);
+            }
+            currCubic = ptsBuffer + 3;
+        } else {
+            this->rotateTo(leftJoinVerb, normals[0]);
+        }
+
+        if (rightT < 1) {
+            SkChopCubicAt(currCubic, ptsBuffer, rightT);
+            this->lineTo(Verb::kInternalRoundJoin, ptsBuffer[3]);
+            currCubic = ptsBuffer + 3;
+            this->cubicTo(Verb::kInternalRoundJoin, currCubic, /*maxCurvatureT=*/0,
+                          kLeftMaxCurvatureNone, kRightMaxCurvatureNone);
+        } else {
+            this->lineTo(Verb::kInternalRoundJoin, currCubic[3]);
+            this->rotateTo(Verb::kInternalRoundJoin, normals[1]);
+        }
+        return;
+    }
+
+    // Recurse and check the other two points of max curvature, if any.
+    if (kRightMaxCurvatureNone != rightMaxCurvatureT) {
+        this->cubicTo(leftJoinVerb, P, rightMaxCurvatureT, leftMaxCurvatureT,
+                      kRightMaxCurvatureNone);
+        return;
+    }
+    if (kLeftMaxCurvatureNone != leftMaxCurvatureT) {
+        SkASSERT(kRightMaxCurvatureNone == rightMaxCurvatureT);
+        this->cubicTo(leftJoinVerb, P, leftMaxCurvatureT, kLeftMaxCurvatureNone,
+                      kRightMaxCurvatureNone);
+        return;
+    }
+
+    this->recordLeftJoinIfNotEmpty(leftJoinVerb, normals[0]);
+    fNormals.push_back_n(2, normals);
+
+    this->recordStroke(Verb::kCubicStroke, SkNextLog2(numSegments));
+    p1.store(&fPoints.push_back());
+    p2.store(&fPoints.push_back());
+    p3.store(&fPoints.push_back());
+}
+
+void GrCCStrokeGeometry::recordStroke(Verb verb, int numSegmentsLog2) {
+    SkASSERT(Verb::kLinearStroke != verb || 0 == numSegmentsLog2);
+    SkASSERT(numSegmentsLog2 <= kMaxNumLinearSegmentsLog2);
+    fVerbs.push_back(verb);
+    if (Verb::kLinearStroke != verb) {
+        SkASSERT(numSegmentsLog2 > 0);
+        fParams.push_back().fNumLinearSegmentsLog2 = numSegmentsLog2;
+    }
+    ++fCurrStrokeTallies->fStrokes[numSegmentsLog2];
+}
+
+void GrCCStrokeGeometry::rotateTo(Verb leftJoinVerb, SkVector normal) {
+    this->recordLeftJoinIfNotEmpty(leftJoinVerb, normal);
+    fNormals.push_back(normal);
+}
+
+void GrCCStrokeGeometry::recordLeftJoinIfNotEmpty(Verb joinVerb, SkVector nextNormal) {
+    if (fNormals.count() <= fCurrContourFirstNormalIdx) {
+        // The contour is empty. Nothing to join with.
+        SkASSERT(fNormals.count() == fCurrContourFirstNormalIdx);
+        return;
+    }
+
+    if (Verb::kBevelJoin == joinVerb) {
+        this->recordBevelJoin(Verb::kBevelJoin);
+        return;
+    }
+
+    Sk2f n0 = Sk2f::Load(&fNormals.back());
+    Sk2f n1 = Sk2f::Load(&nextNormal);
+    Sk2f base = n1 - n0;
+    if ((base.abs() * fCurrStrokeRadius < kMaxErrorFromLinearization).allTrue()) {
+        // Treat any join as a bevel when the outside corners of the two adjoining strokes are
+        // close enough to each other. This is important because "miterCapHeightOverWidth" becomes
+        // unstable when n0 and n1 are nearly equal.
+        this->recordBevelJoin(joinVerb);
+        return;
+    }
+
+    // We implement miters and round joins by placing a triangle-shaped cap on top of a bevel join.
+    // (For round joins this triangle cap comprises the conic control points.) Find how tall to make
+    // this triangle cap, relative to its width.
+    //
+    // NOTE: This value would be infinite at 180 degrees, but we clamp miterCapHeightOverWidth at
+    // near-infinity. 180-degree round joins still look perfectly acceptable like this (though
+    // technically not pure arcs).
+    Sk2f cross = base * SkNx_shuffle<1,0>(n0);
+    Sk2f dot = base * n0;
+    float miterCapHeight = SkScalarAbs(dot[0] + dot[1]);
+    float miterCapWidth = SkScalarAbs(cross[0] - cross[1]) * 2;
+
+    if (Verb::kMiterJoin == joinVerb) {
+        if (miterCapHeight > fMiterMaxCapHeightOverWidth * miterCapWidth) {
+            // This join is tighter than the miter limit. Treat it as a bevel.
+            this->recordBevelJoin(Verb::kMiterJoin);
+            return;
+        }
+        this->recordMiterJoin(miterCapHeight / miterCapWidth);
+        return;
+    }
+
+    SkASSERT(Verb::kRoundJoin == joinVerb || Verb::kInternalRoundJoin == joinVerb);
+
+    // Conic arcs become unstable when they approach 180 degrees. When the conic control point
+    // begins shooting off to infinity (i.e., height/width > 32), split the conic into two.
+    static constexpr float kAlmost180Degrees = 32;
+    if (miterCapHeight > kAlmost180Degrees * miterCapWidth) {
+        Sk2f bisect = normalize(n0 - n1);
+        this->rotateTo(joinVerb, SkVector::Make(-bisect[1], bisect[0]));
+        this->recordLeftJoinIfNotEmpty(joinVerb, nextNormal);
+        return;
+    }
+
+    float miterCapHeightOverWidth = miterCapHeight / miterCapWidth;
+
+    // Find the heights of this round join's conic control point as well as the arc itself.
+    Sk2f X, Y;
+    transpose(base * base, n0 * n1, &X, &Y);
+    Sk2f r = Sk2f::Max(X + Y + Sk2f(0, 1), 0.f).sqrt();
+    Sk2f heights = SkNx_fma(r, Sk2f(miterCapHeightOverWidth, -SK_ScalarRoot2Over2), Sk2f(0, 1));
+    float controlPointHeight = SkScalarAbs(heights[0]);
+    float curveHeight = heights[1];
+    if (curveHeight * fCurrStrokeRadius < kMaxErrorFromLinearization) {
+        // Treat round joins as bevels when their curvature is nearly flat.
+        this->recordBevelJoin(joinVerb);
+        return;
+    }
+
+    float w = curveHeight / (controlPointHeight - curveHeight);
+    this->recordRoundJoin(joinVerb, miterCapHeightOverWidth, w);
+}
+
+void GrCCStrokeGeometry::recordBevelJoin(Verb originalJoinVerb) {
+    if (!IsInternalJoinVerb(originalJoinVerb)) {
+        fVerbs.push_back(Verb::kBevelJoin);
+        ++fCurrStrokeTallies->fTriangles;
+    } else {
+        fVerbs.push_back(Verb::kInternalBevelJoin);
+        fCurrStrokeTallies->fTriangles += 2;
+    }
+}
+
+void GrCCStrokeGeometry::recordMiterJoin(float miterCapHeightOverWidth) {
+    fVerbs.push_back(Verb::kMiterJoin);
+    fParams.push_back().fMiterCapHeightOverWidth = miterCapHeightOverWidth;
+    fCurrStrokeTallies->fTriangles += 2;
+}
+
+void GrCCStrokeGeometry::recordRoundJoin(Verb joinVerb, float miterCapHeightOverWidth,
+                                         float conicWeight) {
+    fVerbs.push_back(joinVerb);
+    fParams.push_back().fConicWeight = conicWeight;
+    fParams.push_back().fMiterCapHeightOverWidth = miterCapHeightOverWidth;
+    if (Verb::kRoundJoin == joinVerb) {
+        ++fCurrStrokeTallies->fTriangles;
+        ++fCurrStrokeTallies->fConics;
+    } else {
+        SkASSERT(Verb::kInternalRoundJoin == joinVerb);
+        fCurrStrokeTallies->fTriangles += 2;
+        fCurrStrokeTallies->fConics += 2;
+    }
+}
+
+void GrCCStrokeGeometry::closeContour() {
+    SkASSERT(fInsideContour);
+    SkASSERT(fPoints.count() > fCurrContourFirstPtIdx);
+    if (fPoints.back() != fPoints[fCurrContourFirstPtIdx]) {
+        // Draw a line back to the beginning.
+        this->lineTo(fCurrStrokeJoinVerb, fPoints[fCurrContourFirstPtIdx]);
+    }
+    if (fNormals.count() > fCurrContourFirstNormalIdx) {
+        // Join the first and last lines.
+        this->rotateTo(fCurrStrokeJoinVerb,fNormals[fCurrContourFirstNormalIdx]);
+    } else {
+        // This contour is empty. Add a bogus normal since the iterator always expects one.
+        SkASSERT(fNormals.count() == fCurrContourFirstNormalIdx);
+        fNormals.push_back({0, 0});
+    }
+    fVerbs.push_back(Verb::kEndContour);
+    SkDEBUGCODE(fInsideContour = false);
+}
+
+void GrCCStrokeGeometry::capContourAndExit() {
+    SkASSERT(fInsideContour);
+    if (fCurrContourFirstNormalIdx >= fNormals.count()) {
+        // This contour is empty. Add a normal in the direction that caps orient on empty geometry.
+        SkASSERT(fNormals.count() == fCurrContourFirstNormalIdx);
+        fNormals.push_back({1, 0});
+    }
+
+    this->recordCapsIfAny();
+    fVerbs.push_back(Verb::kEndContour);
+
+    SkDEBUGCODE(fInsideContour = false);
+}
+
+void GrCCStrokeGeometry::recordCapsIfAny() {
+    SkASSERT(fInsideContour);
+    SkASSERT(fCurrContourFirstNormalIdx < fNormals.count());
+
+    if (SkPaint::kButt_Cap == fCurrStrokeCapType) {
+        return;
+    }
+
+    Verb capVerb;
+    if (SkPaint::kSquare_Cap == fCurrStrokeCapType) {
+        if (fCurrStrokeRadius * SK_ScalarRoot2Over2 < kMaxErrorFromLinearization) {
+            return;
+        }
+        capVerb = Verb::kSquareCap;
+        fCurrStrokeTallies->fStrokes[0] += 2;
+    } else {
+        SkASSERT(SkPaint::kRound_Cap == fCurrStrokeCapType);
+        if (fCurrStrokeRadius < kMaxErrorFromLinearization) {
+            return;
+        }
+        capVerb = Verb::kRoundCap;
+        fCurrStrokeTallies->fTriangles += 2;
+        fCurrStrokeTallies->fConics += 4;
+    }
+
+    fVerbs.push_back(capVerb);
+    fVerbs.push_back(Verb::kEndContour);
+
+    fVerbs.push_back(capVerb);
+
+    // Reserve the space first, since push_back() takes the point by reference and might
+    // invalidate the reference if the array grows.
+    fPoints.reserve(fPoints.count() + 1);
+    fPoints.push_back(fPoints[fCurrContourFirstPtIdx]);
+
+    // Reserve the space first, since push_back() takes the normal by reference and might
+    // invalidate the reference if the array grows. (Although in this case we should be fine
+    // since there is a negate operator.)
+    fNormals.reserve(fNormals.count() + 1);
+    fNormals.push_back(-fNormals[fCurrContourFirstNormalIdx]);
+}
diff --git a/src/gpu/ccpr/GrCCStrokeGeometry.h b/src/gpu/ccpr/GrCCStrokeGeometry.h
new file mode 100644
index 0000000..56871f2
--- /dev/null
+++ b/src/gpu/ccpr/GrCCStrokeGeometry.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrGrCCStrokeGeometry_DEFINED
+#define GrGrCCStrokeGeometry_DEFINED
+
+#include "SkPaint.h"
+#include "SkPoint.h"
+#include "SkTArray.h"
+
+class SkStrokeRec;
+
+/**
+ * This class converts device-space stroked paths into a set of independent strokes, joins, and caps
+ * that map directly to coverage-counted GPU instances. Non-hairline strokes can only be drawn with
+ * rigid body transforms; we don't yet support skewing the stroke lines themselves.
+ */
+class GrCCStrokeGeometry {
+public:
+    static constexpr int kMaxNumLinearSegmentsLog2 = 15;
+
+    GrCCStrokeGeometry(int numSkPoints = 0, int numSkVerbs = 0)
+            : fVerbs(numSkVerbs * 5/2)  // Reserve for a 2.5x expansion in verbs. (Joins get their
+                                        // own separate verb in our representation.)
+            , fParams(numSkVerbs * 3)  // Somewhere around 1-2 params per verb.
+            , fPoints(numSkPoints * 5/4)  // Reserve for a 1.25x expansion in points and normals.
+            , fNormals(numSkPoints * 5/4) {}
+
+    // A string of verbs and their corresponding, params, points, and normals are a compact
+    // representation of what will eventually be independent instances in GPU buffers. When added
+    // up, the combined coverage of all these instances will make complete stroked paths.
+    enum class Verb : uint8_t {
+        kBeginPath,  // Instructs the iterator to advance its stroke width, atlas offset, etc.
+
+        // Independent strokes of a single line or curve, with (antialiased) butt caps on the ends.
+        kLinearStroke,
+        kQuadraticStroke,
+        kCubicStroke,
+
+        // Joins are a triangles that connect the outer corners of two adjoining strokes. Miters
+        // have an additional triangle cap on top of the bevel, and round joins have an arc on top.
+        kBevelJoin,
+        kMiterJoin,
+        kRoundJoin,
+
+        // We use internal joins when we have to internally break up a stroke because its curvature
+        // is too strong for a triangle strip. They are coverage-counted, self-intersecting
+        // quadrilaterals that tie the four corners of two adjoining strokes together a like a
+        // shoelace. (Coverage is negative on the inside half.) We place an arc on both ends of an
+        // internal round join.
+        kInternalBevelJoin,
+        kInternalRoundJoin,
+
+        kSquareCap,
+        kRoundCap,
+
+        kEndContour  // Instructs the iterator to advance its internal point and normal ptrs.
+    };
+    static bool IsInternalJoinVerb(Verb verb);
+
+    // Some verbs require additional parameters(s).
+    union Parameter {
+        // For cubic and quadratic strokes: How many flat line segments to chop the curve into?
+        int fNumLinearSegmentsLog2;
+        // For miter and round joins: How tall should the triangle cap be on top of the join?
+        // (This triangle is the conic control points for a round join.)
+        float fMiterCapHeightOverWidth;
+        float fConicWeight;  // Round joins only.
+    };
+
+    const SkTArray<Verb, true>& verbs() const { SkASSERT(!fInsideContour); return fVerbs; }
+    const SkTArray<Parameter, true>& params() const { SkASSERT(!fInsideContour); return fParams; }
+    const SkTArray<SkPoint, true>& points() const { SkASSERT(!fInsideContour); return fPoints; }
+    const SkTArray<SkVector, true>& normals() const { SkASSERT(!fInsideContour); return fNormals; }
+
+    // These track the numbers of instances required to draw all the recorded strokes.
+    struct InstanceTallies {
+        int fStrokes[kMaxNumLinearSegmentsLog2 + 1];
+        int fTriangles;
+        int fConics;
+
+        InstanceTallies operator+(const InstanceTallies&) const;
+    };
+
+    void beginPath(const SkStrokeRec&, float strokeDevWidth, InstanceTallies*);
+    void moveTo(SkPoint);
+    void lineTo(SkPoint);
+    void quadraticTo(const SkPoint[3]);
+    void cubicTo(const SkPoint[4]);
+    void closeContour();  // Connect back to the first point in the contour and exit.
+    void capContourAndExit();  // Add endcaps (if any) and exit the contour.
+
+private:
+    void lineTo(Verb leftJoinVerb, SkPoint);
+    void quadraticTo(Verb leftJoinVerb, const SkPoint[3], float maxCurvatureT);
+
+    static constexpr float kLeftMaxCurvatureNone = 1;
+    static constexpr float kRightMaxCurvatureNone = 0;
+    void cubicTo(Verb leftJoinVerb, const SkPoint[4], float maxCurvatureT, float leftMaxCurvatureT,
+                 float rightMaxCurvatureT);
+
+    // Pushes a new normal to fNormals and records a join, without changing the current position.
+    void rotateTo(Verb leftJoinVerb, SkVector normal);
+
+    // Records a stroke in fElememts.
+    void recordStroke(Verb, int numSegmentsLog2);
+
+    // Records a join in fElememts with the previous stroke, if the cuurent contour is not empty.
+    void recordLeftJoinIfNotEmpty(Verb joinType, SkVector nextNormal);
+    void recordBevelJoin(Verb originalJoinVerb);
+    void recordMiterJoin(float miterCapHeightOverWidth);
+    void recordRoundJoin(Verb roundJoinVerb, float miterCapHeightOverWidth, float conicWeight);
+
+    void recordCapsIfAny();
+
+    float fCurrStrokeRadius;
+    Verb fCurrStrokeJoinVerb;
+    SkPaint::Cap fCurrStrokeCapType;
+    InstanceTallies* fCurrStrokeTallies = nullptr;
+
+    // We implement miters by placing a triangle-shaped cap on top of a bevel join. This field tells
+    // us what the miter limit is, restated in terms of how tall that triangle cap can be.
+    float fMiterMaxCapHeightOverWidth;
+
+    // Any curvature on the original curve gets magnified on the outer edge of the stroke,
+    // proportional to how thick the stroke radius is. This field tells us the maximum curvature we
+    // can tolerate using the current stroke radius, before linearization artifacts begin to appear
+    // on the outer edge.
+    //
+    // (Curvature this strong is quite rare in practice, but when it does happen, we decompose the
+    // section with strong curvature into lineTo's with round joins in between.)
+    float fMaxCurvatureCosTheta;
+
+    int fCurrContourFirstPtIdx;
+    int fCurrContourFirstNormalIdx;
+
+    SkDEBUGCODE(bool fInsideContour = false);
+
+    SkSTArray<128, Verb, true> fVerbs;
+    SkSTArray<128, Parameter, true> fParams;
+    SkSTArray<128, SkPoint, true> fPoints;
+    SkSTArray<128, SkVector, true> fNormals;
+};
+
+inline GrCCStrokeGeometry::InstanceTallies GrCCStrokeGeometry::InstanceTallies::operator+(
+        const InstanceTallies& t) const {
+    InstanceTallies ret;
+    for (int i = 0; i <= kMaxNumLinearSegmentsLog2; ++i) {
+        ret.fStrokes[i] = fStrokes[i] + t.fStrokes[i];
+    }
+    ret.fTriangles = fTriangles + t.fTriangles;
+    ret.fConics = fConics + t.fConics;
+    return ret;
+}
+
+inline bool GrCCStrokeGeometry::IsInternalJoinVerb(Verb verb) {
+    switch (verb) {
+        case Verb::kInternalBevelJoin:
+        case Verb::kInternalRoundJoin:
+            return true;
+        case Verb::kBeginPath:
+        case Verb::kLinearStroke:
+        case Verb::kQuadraticStroke:
+        case Verb::kCubicStroke:
+        case Verb::kBevelJoin:
+        case Verb::kMiterJoin:
+        case Verb::kRoundJoin:
+        case Verb::kSquareCap:
+        case Verb::kRoundCap:
+        case Verb::kEndContour:
+            return false;
+    }
+    SK_ABORT("Invalid GrCCStrokeGeometry::Verb.");
+    return false;
+}
+#endif
diff --git a/src/gpu/ccpr/GrCCStroker.cpp b/src/gpu/ccpr/GrCCStroker.cpp
new file mode 100644
index 0000000..ab3906c
--- /dev/null
+++ b/src/gpu/ccpr/GrCCStroker.cpp
@@ -0,0 +1,832 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrCCStroker.h"
+
+#include "GrGpuCommandBuffer.h"
+#include "GrOnFlushResourceProvider.h"
+#include "SkPathPriv.h"
+#include "SkStrokeRec.h"
+#include "ccpr/GrCCCoverageProcessor.h"
+#include "glsl/GrGLSLFragmentShaderBuilder.h"
+#include "glsl/GrGLSLVertexGeoBuilder.h"
+
+static constexpr int kMaxNumLinearSegmentsLog2 = GrCCStrokeGeometry::kMaxNumLinearSegmentsLog2;
+using TriangleInstance = GrCCCoverageProcessor::TriPointInstance;
+using ConicInstance = GrCCCoverageProcessor::QuadPointInstance;
+
+namespace {
+
+struct LinearStrokeInstance {
+    float fEndpoints[4];
+    float fStrokeRadius;
+
+    inline void set(const SkPoint[2], float dx, float dy, float strokeRadius);
+};
+
+inline void LinearStrokeInstance::set(const SkPoint P[2], float dx, float dy, float strokeRadius) {
+    Sk2f X, Y;
+    Sk2f::Load2(P, &X, &Y);
+    Sk2f::Store2(fEndpoints, X + dx, Y + dy);
+    fStrokeRadius = strokeRadius;
+}
+
+struct CubicStrokeInstance {
+    float fX[4];
+    float fY[4];
+    float fStrokeRadius;
+    float fNumSegments;
+
+    inline void set(const SkPoint[4], float dx, float dy, float strokeRadius, int numSegments);
+    inline void set(const Sk4f& X, const Sk4f& Y, float dx, float dy, float strokeRadius,
+                    int numSegments);
+};
+
+inline void CubicStrokeInstance::set(const SkPoint P[4], float dx, float dy, float strokeRadius,
+                                     int numSegments) {
+    Sk4f X, Y;
+    Sk4f::Load2(P, &X, &Y);
+    this->set(X, Y, dx, dy, strokeRadius, numSegments);
+}
+
+inline void CubicStrokeInstance::set(const Sk4f& X, const Sk4f& Y, float dx, float dy,
+                                     float strokeRadius, int numSegments) {
+    (X + dx).store(&fX);
+    (Y + dy).store(&fY);
+    fStrokeRadius = strokeRadius;
+    fNumSegments = static_cast<float>(numSegments);
+}
+
+// This class draws stroked lines in post-transform device space (a.k.a. rectangles). Rigid-body
+// transforms can be achieved by transforming the line ahead of time and adjusting the stroke
+// width. Skews of the stroke itself are not yet supported.
+//
+// Corner coverage is AA-correct, meaning, n^2 attenuation along the diagonals. This is important
+// for seamless integration with the connecting geometry.
+class LinearStrokeProcessor : public GrGeometryProcessor {
+public:
+    LinearStrokeProcessor() : GrGeometryProcessor(kLinearStrokeProcessor_ClassID) {
+        this->setInstanceAttributeCnt(2);
+#ifdef SK_DEBUG
+        // Check that instance attributes exactly match the LinearStrokeInstance struct layout.
+        using Instance = LinearStrokeInstance;
+        SkASSERT(!strcmp(this->instanceAttribute(0).name(), "endpts"));
+        SkASSERT(this->debugOnly_instanceAttributeOffset(0) == offsetof(Instance, fEndpoints));
+        SkASSERT(!strcmp(this->instanceAttribute(1).name(), "stroke_radius"));
+        SkASSERT(this->debugOnly_instanceAttributeOffset(1) == offsetof(Instance, fStrokeRadius));
+        SkASSERT(this->debugOnly_instanceStride() == sizeof(Instance));
+#endif
+    }
+
+private:
+    const char* name() const override { return "LinearStrokeProcessor"; }
+    void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {}
+
+    static constexpr Attribute kInstanceAttribs[2] = {
+            {"endpts", kFloat4_GrVertexAttribType},
+            {"stroke_radius", kFloat_GrVertexAttribType}
+    };
+
+    const Attribute& onInstanceAttribute(int i) const override { return kInstanceAttribs[i]; }
+
+    class Impl : public GrGLSLGeometryProcessor {
+        void setData(const GrGLSLProgramDataManager&, const GrPrimitiveProcessor&,
+                     FPCoordTransformIter&&) override {}
+        void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override;
+    };
+
+    GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override {
+        return new Impl();
+    }
+};
+
+void LinearStrokeProcessor::Impl::onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) {
+    GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
+    GrGLSLUniformHandler* uniHandler = args.fUniformHandler;
+
+    varyingHandler->emitAttributes(args.fGP.cast<LinearStrokeProcessor>());
+
+    GrGLSLVertexBuilder* v = args.fVertBuilder;
+    v->codeAppend ("float2 tan = normalize(endpts.zw - endpts.xy);");
+    v->codeAppend ("float2 n = float2(tan.y, -tan.x);");
+    v->codeAppend ("float nwidth = abs(n.x) + abs(n.y);");
+
+    // Outset the vertex position for AA butt caps.
+    v->codeAppend ("float2 outset = tan*nwidth/2;");
+    v->codeAppend ("float2 position = (sk_VertexID < 2) "
+                           "? endpts.xy - outset : endpts.zw + outset;");
+
+    // Calculate Manhattan distance from both butt caps, where distance=0 on the actual endpoint and
+    // distance=-.5 on the outset edge.
+    GrGLSLVarying edgeDistances(kFloat4_GrSLType);
+    varyingHandler->addVarying("edge_distances", &edgeDistances);
+    v->codeAppendf("%s.xz = float2(-.5, dot(endpts.zw - endpts.xy, tan) / nwidth + .5);",
+                   edgeDistances.vsOut());
+    v->codeAppendf("%s.xz = (sk_VertexID < 2) ? %s.xz : %s.zx;",
+                   edgeDistances.vsOut(), edgeDistances.vsOut(), edgeDistances.vsOut());
+
+    // Outset the vertex position for stroke radius plus edge AA.
+    v->codeAppend ("outset = n * (stroke_radius + nwidth/2);");
+    v->codeAppend ("position += (0 == (sk_VertexID & 1)) ? +outset : -outset;");
+
+    // Calculate Manhattan distance from both edges, where distance=0 on the actual edge and
+    // distance=-.5 on the outset.
+    v->codeAppendf("%s.yw = float2(-.5, 2*stroke_radius / nwidth + .5);", edgeDistances.vsOut());
+    v->codeAppendf("%s.yw = (0 == (sk_VertexID & 1)) ? %s.yw : %s.wy;",
+                   edgeDistances.vsOut(), edgeDistances.vsOut(), edgeDistances.vsOut());
+
+    gpArgs->fPositionVar.set(kFloat2_GrSLType, "position");
+    this->emitTransforms(v, varyingHandler, uniHandler, GrShaderVar("position", kFloat2_GrSLType),
+                         SkMatrix::I(), args.fFPCoordTransformHandler);
+
+    // Use the 4 edge distances to calculate coverage in the fragment shader.
+    GrGLSLFPFragmentBuilder* f = args.fFragBuilder;
+    f->codeAppendf("half2 coverages = min(%s.xy, .5) + min(%s.zw, .5);",
+                   edgeDistances.fsIn(), edgeDistances.fsIn());
+    f->codeAppendf("%s = half4(coverages.x * coverages.y);", args.fOutputColor);
+
+    // This shader doesn't use the built-in Ganesh coverage.
+    f->codeAppendf("%s = half4(1);", args.fOutputCoverage);
+}
+
+constexpr GrPrimitiveProcessor::Attribute LinearStrokeProcessor::kInstanceAttribs[];
+
+// This class draws stroked cubics in post-transform device space. Rigid-body transforms can be
+// achieved by transforming the curve ahead of time and adjusting the stroke width. Skews of the
+// stroke itself are not yet supported. Quadratics can be drawn by converting them to cubics.
+//
+// This class works by finding stroke-width line segments orthogonal to the curve at a
+// pre-determined number of evenly spaced points along the curve (evenly spaced in the parametric
+// sense). It then connects the segments with a triangle strip. As for common in CCPR, clockwise-
+// winding triangles from the strip emit positive coverage, counter-clockwise triangles emit
+// negative, and we use SkBlendMode::kPlus.
+class CubicStrokeProcessor : public GrGeometryProcessor {
+public:
+    CubicStrokeProcessor() : GrGeometryProcessor(kCubicStrokeProcessor_ClassID) {
+        this->setInstanceAttributeCnt(3);
+#ifdef SK_DEBUG
+        // Check that instance attributes exactly match the CubicStrokeInstance struct layout.
+        using Instance = CubicStrokeInstance;
+        SkASSERT(!strcmp(this->instanceAttribute(0).name(), "X"));
+        SkASSERT(this->debugOnly_instanceAttributeOffset(0) == offsetof(Instance, fX));
+        SkASSERT(!strcmp(this->instanceAttribute(1).name(), "Y"));
+        SkASSERT(this->debugOnly_instanceAttributeOffset(1) == offsetof(Instance, fY));
+        SkASSERT(!strcmp(this->instanceAttribute(2).name(), "stroke_info"));
+        SkASSERT(this->debugOnly_instanceAttributeOffset(2) == offsetof(Instance, fStrokeRadius));
+        SkASSERT(this->debugOnly_instanceStride() == sizeof(Instance));
+#endif
+    }
+
+private:
+    const char* name() const override { return "CubicStrokeProcessor"; }
+    void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {}
+
+    static constexpr Attribute kInstanceAttribs[3] = {
+            {"X", kFloat4_GrVertexAttribType},
+            {"Y", kFloat4_GrVertexAttribType},
+            {"stroke_info", kFloat2_GrVertexAttribType}
+    };
+
+    const Attribute& onInstanceAttribute(int i) const override { return kInstanceAttribs[i]; }
+
+    class Impl : public GrGLSLGeometryProcessor {
+        void setData(const GrGLSLProgramDataManager&, const GrPrimitiveProcessor&,
+                     FPCoordTransformIter&&) override {}
+        void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override;
+    };
+
+    GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override {
+        return new Impl();
+    }
+};
+
+void CubicStrokeProcessor::Impl::onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) {
+    GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
+    GrGLSLUniformHandler* uniHandler = args.fUniformHandler;
+
+    varyingHandler->emitAttributes(args.fGP.cast<CubicStrokeProcessor>());
+
+    GrGLSLVertexBuilder* v = args.fVertBuilder;
+    v->codeAppend ("float4x2 P = transpose(float2x4(X, Y));");
+    v->codeAppend ("float stroke_radius = stroke_info[0];");
+    v->codeAppend ("float num_segments = stroke_info[1];");
+
+    // Find the parametric T value at which we will emit our orthogonal line segment. We emit two
+    // line segments at T=0 and double at T=1 as well for AA butt caps.
+    v->codeAppend ("float point_id = float(sk_VertexID/2);");
+    v->codeAppend ("float T = max((point_id - 1) / num_segments, 0);");
+    v->codeAppend ("T = (point_id >= num_segments + 1) ? 1 : T;");  // In case x/x !== 1.
+
+    // Use De Casteljau's algorithm to find the position and tangent for our orthogonal line
+    // segment. De Casteljau's is more numerically stable than evaluating the curve and derivative
+    // directly.
+    v->codeAppend ("float2 ab = mix(P[0], P[1], T);");
+    v->codeAppend ("float2 bc = mix(P[1], P[2], T);");
+    v->codeAppend ("float2 cd = mix(P[2], P[3], T);");
+    v->codeAppend ("float2 abc = mix(ab, bc, T);");
+    v->codeAppend ("float2 bcd = mix(bc, cd, T);");
+    v->codeAppend ("float2 position = mix(abc, bcd, T);");
+    v->codeAppend ("float2 tan = bcd - abc;");
+
+    // Find actual tangents for the corner cases when De Casteljau's yields tan=0. (We shouldn't
+    // encounter other numerically unstable cases where tan ~= 0, because GrCCStrokeGeometry snaps
+    // control points to endpoints in curves where they are almost equal.)
+    v->codeAppend ("if (0 == T && P[0] == P[1]) {");
+    v->codeAppend (    "tan = P[2] - P[0];");
+    v->codeAppend ("}");
+    v->codeAppend ("if (1 == T && P[2] == P[3]) {");
+    v->codeAppend (    "tan = P[3] - P[1];");
+    v->codeAppend ("}");
+    v->codeAppend ("tan = normalize(tan);");
+    v->codeAppend ("float2 n = float2(tan.y, -tan.x);");
+    v->codeAppend ("float nwidth = abs(n.x) + abs(n.y);");
+
+    // Outset the vertex position for stroke radius plus edge AA.
+    v->codeAppend ("float2 outset = n * (stroke_radius + nwidth/2);");
+    v->codeAppend ("position += (0 == (sk_VertexID & 1)) ? -outset : +outset;");
+
+    // Calculate the Manhattan distance from both edges, where distance=0 on the actual edge and
+    // distance=-.5 on the outset.
+    GrGLSLVarying coverages(kFloat3_GrSLType);
+    varyingHandler->addVarying("coverages", &coverages);
+    v->codeAppendf("%s.xy = float2(-.5, 2*stroke_radius / nwidth + .5);", coverages.vsOut());
+    v->codeAppendf("%s.xy = (0 == (sk_VertexID & 1)) ? %s.xy : %s.yx;",
+                   coverages.vsOut(), coverages.vsOut(), coverages.vsOut());
+
+    // Adjust the orthogonal line segments on the endpoints so they straddle the actual endpoint
+    // at a Manhattan distance of .5 on either side.
+    v->codeAppend ("if (0 == point_id || num_segments+1 == point_id) {");
+    v->codeAppend (    "position -= tan*nwidth/2;");
+    v->codeAppend ("}");
+    v->codeAppend ("if (1 == point_id || num_segments+2 == point_id) {");
+    v->codeAppend (    "position += tan*nwidth/2;");
+    v->codeAppend ("}");
+
+    // Interpolate coverage for butt cap AA from 0 on the outer segment to 1 on the inner.
+    v->codeAppendf("%s.z = (0 == point_id || num_segments+2 == point_id) ? 0 : 1;",
+                   coverages.vsOut());
+
+    gpArgs->fPositionVar.set(kFloat2_GrSLType, "position");
+    this->emitTransforms(v, varyingHandler, uniHandler, GrShaderVar("position", kFloat2_GrSLType),
+                         SkMatrix::I(), args.fFPCoordTransformHandler);
+
+    // Use the 2 edge distances and interpolated butt cap AA to calculate fragment coverage.
+    GrGLSLFPFragmentBuilder* f = args.fFragBuilder;
+    f->codeAppendf("half2 edge_coverages = min(%s.xy, .5);", coverages.fsIn());
+    f->codeAppend ("half coverage = edge_coverages.x + edge_coverages.y;");
+    f->codeAppendf("coverage *= %s.z;", coverages.fsIn());  // Butt cap AA.
+
+    // As is common for CCPR, clockwise-winding triangles from the strip emit positive coverage, and
+    // counter-clockwise triangles emit negative.
+    f->codeAppendf("%s = half4(sk_Clockwise ? +coverage : -coverage);", args.fOutputColor);
+
+    // This shader doesn't use the built-in Ganesh coverage.
+    f->codeAppendf("%s = half4(1);", args.fOutputCoverage);
+}
+
+constexpr GrPrimitiveProcessor::Attribute CubicStrokeProcessor::kInstanceAttribs[];
+
+}  // anonymous namespace
+
+void GrCCStroker::parseDeviceSpaceStroke(const SkPath& path, const SkPoint* deviceSpacePts,
+                                         const SkStrokeRec& stroke, float strokeDevWidth,
+                                         GrScissorTest scissorTest,
+                                         const SkIRect& clippedDevIBounds,
+                                         const SkIVector& devToAtlasOffset) {
+    SkASSERT(SkStrokeRec::kStroke_Style == stroke.getStyle() ||
+             SkStrokeRec::kHairline_Style == stroke.getStyle());
+    SkASSERT(!fInstanceBuffer);
+    SkASSERT(!path.isEmpty());
+
+    if (!fHasOpenBatch) {
+        fBatches.emplace_back(&fTalliesAllocator, *fInstanceCounts[(int)GrScissorTest::kDisabled],
+                              fScissorSubBatches.count());
+        fInstanceCounts[(int)GrScissorTest::kDisabled] = fBatches.back().fNonScissorEndInstances;
+        fHasOpenBatch = true;
+    }
+
+    InstanceTallies* currStrokeEndIndices;
+    if (GrScissorTest::kEnabled == scissorTest) {
+        SkASSERT(fBatches.back().fEndScissorSubBatch == fScissorSubBatches.count());
+        fScissorSubBatches.emplace_back(
+                &fTalliesAllocator, *fInstanceCounts[(int)GrScissorTest::kEnabled],
+                clippedDevIBounds.makeOffset(devToAtlasOffset.x(), devToAtlasOffset.y()));
+        fBatches.back().fEndScissorSubBatch = fScissorSubBatches.count();
+        fInstanceCounts[(int)GrScissorTest::kEnabled] =
+                currStrokeEndIndices = fScissorSubBatches.back().fEndInstances;
+    } else {
+        currStrokeEndIndices = fBatches.back().fNonScissorEndInstances;
+    }
+
+    fGeometry.beginPath(stroke, strokeDevWidth, currStrokeEndIndices);
+
+    fPathInfos.push_back() = {devToAtlasOffset, strokeDevWidth/2, scissorTest};
+
+    int devPtsIdx = 0;
+    SkPath::Verb previousVerb = SkPath::kClose_Verb;
+
+    for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
+        SkASSERT(SkPath::kDone_Verb != previousVerb);
+        const SkPoint* P = &deviceSpacePts[devPtsIdx - 1];
+        switch (verb) {
+            case SkPath::kMove_Verb:
+                if (devPtsIdx > 0 && SkPath::kClose_Verb != previousVerb) {
+                    fGeometry.capContourAndExit();
+                }
+                fGeometry.moveTo(deviceSpacePts[devPtsIdx]);
+                ++devPtsIdx;
+                break;
+            case SkPath::kClose_Verb:
+                SkASSERT(SkPath::kClose_Verb != previousVerb);
+                fGeometry.closeContour();
+                break;
+            case SkPath::kLine_Verb:
+                SkASSERT(SkPath::kClose_Verb != previousVerb);
+                fGeometry.lineTo(P[1]);
+                ++devPtsIdx;
+                break;
+            case SkPath::kQuad_Verb:
+                SkASSERT(SkPath::kClose_Verb != previousVerb);
+                fGeometry.quadraticTo(P);
+                devPtsIdx += 2;
+                break;
+            case SkPath::kCubic_Verb: {
+                SkASSERT(SkPath::kClose_Verb != previousVerb);
+                fGeometry.cubicTo(P);
+                devPtsIdx += 3;
+                break;
+            }
+            case SkPath::kConic_Verb:
+                SkASSERT(SkPath::kClose_Verb != previousVerb);
+                SK_ABORT("Stroked conics not supported.");
+                break;
+            case SkPath::kDone_Verb:
+                break;
+        }
+        previousVerb = verb;
+    }
+
+    if (devPtsIdx > 0 && SkPath::kClose_Verb != previousVerb) {
+        fGeometry.capContourAndExit();
+    }
+}
+
+// This class encapsulates the process of expanding ready-to-draw geometry from GrCCStrokeGeometry
+// directly into GPU instance buffers.
+class GrCCStroker::InstanceBufferBuilder {
+public:
+    InstanceBufferBuilder(GrOnFlushResourceProvider* onFlushRP, GrCCStroker* stroker) {
+        memcpy(fNextInstances, stroker->fBaseInstances, sizeof(fNextInstances));
+#ifdef SK_DEBUG
+        fEndInstances[0] = stroker->fBaseInstances[0] + *stroker->fInstanceCounts[0];
+        fEndInstances[1] = stroker->fBaseInstances[1] + *stroker->fInstanceCounts[1];
+#endif
+
+        int endConicsIdx = stroker->fBaseInstances[1].fConics +
+                           stroker->fInstanceCounts[1]->fConics;
+        fInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType,
+                                                endConicsIdx * sizeof(ConicInstance));
+        if (!fInstanceBuffer) {
+            SkDebugf("WARNING: failed to allocate CCPR stroke instance buffer.\n");
+            return;
+        }
+        fInstanceBufferData = fInstanceBuffer->map();
+    }
+
+    bool isMapped() const { return SkToBool(fInstanceBufferData); }
+
+    void updateCurrentInfo(const PathInfo& pathInfo) {
+        SkASSERT(this->isMapped());
+        fCurrDX = static_cast<float>(pathInfo.fDevToAtlasOffset.x());
+        fCurrDY = static_cast<float>(pathInfo.fDevToAtlasOffset.y());
+        fCurrStrokeRadius = pathInfo.fStrokeRadius;
+        fCurrNextInstances = &fNextInstances[(int)pathInfo.fScissorTest];
+        SkDEBUGCODE(fCurrEndInstances = &fEndInstances[(int)pathInfo.fScissorTest]);
+    }
+
+    void appendLinearStroke(const SkPoint endpts[2]) {
+        SkASSERT(this->isMapped());
+        this->appendLinearStrokeInstance().set(endpts, fCurrDX, fCurrDY, fCurrStrokeRadius);
+    }
+
+    void appendQuadraticStroke(const SkPoint P[3], int numLinearSegmentsLog2) {
+        SkASSERT(this->isMapped());
+        SkASSERT(numLinearSegmentsLog2 > 0);
+
+        Sk4f ptsT[2];
+        Sk2f p0 = Sk2f::Load(P);
+        Sk2f p1 = Sk2f::Load(P+1);
+        Sk2f p2 = Sk2f::Load(P+2);
+
+        // Convert the quadratic to cubic.
+        Sk2f c1 = SkNx_fma(Sk2f(2/3.f), p1 - p0, p0);
+        Sk2f c2 = SkNx_fma(Sk2f(1/3.f), p2 - p1, p1);
+        Sk2f::Store4(ptsT, p0, c1, c2, p2);
+
+        this->appendCubicStrokeInstance(numLinearSegmentsLog2).set(
+                ptsT[0], ptsT[1], fCurrDX, fCurrDY, fCurrStrokeRadius, 1 << numLinearSegmentsLog2);
+    }
+
+    void appendCubicStroke(const SkPoint P[3], int numLinearSegmentsLog2) {
+        SkASSERT(this->isMapped());
+        SkASSERT(numLinearSegmentsLog2 > 0);
+        this->appendCubicStrokeInstance(numLinearSegmentsLog2).set(
+                P, fCurrDX, fCurrDY, fCurrStrokeRadius, 1 << numLinearSegmentsLog2);
+    }
+
+    void appendJoin(Verb joinVerb, const SkPoint& center, const SkVector& leftNorm,
+                    const SkVector& rightNorm, float miterCapHeightOverWidth, float conicWeight) {
+        SkASSERT(this->isMapped());
+
+        Sk2f offset = Sk2f::Load(&center) + Sk2f(fCurrDX, fCurrDY);
+        Sk2f n0 = Sk2f::Load(&leftNorm);
+        Sk2f n1 = Sk2f::Load(&rightNorm);
+
+        // Identify the outer edge.
+        Sk2f cross = n0 * SkNx_shuffle<1,0>(n1);
+        if (cross[0] < cross[1]) {
+            Sk2f tmp = n0;
+            n0 = -n1;
+            n1 = -tmp;
+        }
+
+        if (!GrCCStrokeGeometry::IsInternalJoinVerb(joinVerb)) {
+            // Normal joins are a triangle that connects the outer corners of two adjoining strokes.
+            this->appendTriangleInstance().set(n1 * fCurrStrokeRadius, Sk2f(0, 0),
+                                               n0 * fCurrStrokeRadius, offset);
+            if (Verb::kBevelJoin == joinVerb) {
+                return;
+            }
+        } else {
+            // Internal joins are coverage-counted, self-intersecting quadrilaterals that tie the
+            // four corners of two adjoining strokes together a like a shoelace. Coverage is
+            // negative on the inside half. We implement this geometry with a pair of triangles.
+            this->appendTriangleInstance().set(-n0 * fCurrStrokeRadius, n0 * fCurrStrokeRadius,
+                                               n1 * fCurrStrokeRadius, offset);
+            this->appendTriangleInstance().set(-n0 * fCurrStrokeRadius, n1 * fCurrStrokeRadius,
+                                               -n1 * fCurrStrokeRadius, offset);
+            if (Verb::kInternalBevelJoin == joinVerb) {
+                return;
+            }
+        }
+
+        // For miter and round joins, we place an additional triangle cap on top of the bevel. This
+        // triangle is literal for miters and is conic control points for round joins.
+        SkASSERT(miterCapHeightOverWidth >= 0);
+        Sk2f base = n1 - n0;
+        Sk2f baseNorm = Sk2f(base[1], -base[0]);
+        Sk2f c = (n0 + n1) * .5f + baseNorm * miterCapHeightOverWidth;
+
+        if (Verb::kMiterJoin == joinVerb) {
+            this->appendTriangleInstance().set(n0 * fCurrStrokeRadius, c * fCurrStrokeRadius,
+                                               n1 * fCurrStrokeRadius, offset);
+        } else {
+            SkASSERT(Verb::kRoundJoin == joinVerb || Verb::kInternalRoundJoin == joinVerb);
+            this->appendConicInstance().setW(n0 * fCurrStrokeRadius, c * fCurrStrokeRadius,
+                                             n1 * fCurrStrokeRadius, offset, conicWeight);
+            if (Verb::kInternalRoundJoin == joinVerb) {
+                this->appendConicInstance().setW(-n1 * fCurrStrokeRadius, c * -fCurrStrokeRadius,
+                                                 -n0 * fCurrStrokeRadius, offset, conicWeight);
+            }
+        }
+    }
+
+    void appendCap(Verb capType, const SkPoint& pt, const SkVector& norm) {
+        SkASSERT(this->isMapped());
+
+        Sk2f n = Sk2f::Load(&norm) * fCurrStrokeRadius;
+        Sk2f v = Sk2f(-n[1], n[0]);
+        Sk2f offset = Sk2f::Load(&pt) + Sk2f(fCurrDX, fCurrDY);
+
+        if (Verb::kSquareCap == capType) {
+            SkPoint endPts[2] = {{0, 0}, {v[0], v[1]}};
+            this->appendLinearStrokeInstance().set(endPts, offset[0], offset[1], fCurrStrokeRadius);
+        } else {
+            SkASSERT(Verb::kRoundCap == capType);
+            this->appendTriangleInstance().set(n, v, -n, offset);
+            this->appendConicInstance().setW(n, n + v, v, offset, SK_ScalarRoot2Over2);
+            this->appendConicInstance().setW(v, v - n, -n, offset, SK_ScalarRoot2Over2);
+        }
+    }
+
+    sk_sp<GrBuffer> finish() {
+        SkASSERT(this->isMapped());
+        SkASSERT(!memcmp(fNextInstances, fEndInstances, sizeof(fNextInstances)));
+        fInstanceBuffer->unmap();
+        fInstanceBufferData = nullptr;
+        SkASSERT(!this->isMapped());
+        return std::move(fInstanceBuffer);
+    }
+
+private:
+    LinearStrokeInstance& appendLinearStrokeInstance() {
+        int instanceIdx = fCurrNextInstances->fStrokes[0]++;
+        SkASSERT(instanceIdx < fCurrEndInstances->fStrokes[0]);
+
+        return reinterpret_cast<LinearStrokeInstance*>(fInstanceBufferData)[instanceIdx];
+    }
+
+    CubicStrokeInstance& appendCubicStrokeInstance(int numLinearSegmentsLog2) {
+        SkASSERT(numLinearSegmentsLog2 > 0);
+        SkASSERT(numLinearSegmentsLog2 <= kMaxNumLinearSegmentsLog2);
+
+        int instanceIdx = fCurrNextInstances->fStrokes[numLinearSegmentsLog2]++;
+        SkASSERT(instanceIdx < fCurrEndInstances->fStrokes[numLinearSegmentsLog2]);
+
+        return reinterpret_cast<CubicStrokeInstance*>(fInstanceBufferData)[instanceIdx];
+    }
+
+    TriangleInstance& appendTriangleInstance() {
+        int instanceIdx = fCurrNextInstances->fTriangles++;
+        SkASSERT(instanceIdx < fCurrEndInstances->fTriangles);
+
+        return reinterpret_cast<TriangleInstance*>(fInstanceBufferData)[instanceIdx];
+    }
+
+    ConicInstance& appendConicInstance() {
+        int instanceIdx = fCurrNextInstances->fConics++;
+        SkASSERT(instanceIdx < fCurrEndInstances->fConics);
+
+        return reinterpret_cast<ConicInstance*>(fInstanceBufferData)[instanceIdx];
+    }
+
+    float fCurrDX, fCurrDY;
+    float fCurrStrokeRadius;
+    InstanceTallies* fCurrNextInstances;
+    SkDEBUGCODE(const InstanceTallies* fCurrEndInstances);
+
+    sk_sp<GrBuffer> fInstanceBuffer;
+    void* fInstanceBufferData = nullptr;
+    InstanceTallies fNextInstances[2];
+    SkDEBUGCODE(InstanceTallies fEndInstances[2]);
+};
+
+GrCCStroker::BatchID GrCCStroker::closeCurrentBatch() {
+    if (!fHasOpenBatch) {
+        return kEmptyBatchID;
+    }
+    int start = (fBatches.count() < 2) ? 0 : fBatches[fBatches.count() - 2].fEndScissorSubBatch;
+    int end = fBatches.back().fEndScissorSubBatch;
+    fMaxNumScissorSubBatches = SkTMax(fMaxNumScissorSubBatches, end - start);
+    fHasOpenBatch = false;
+    return fBatches.count() - 1;
+}
+
+bool GrCCStroker::prepareToDraw(GrOnFlushResourceProvider* onFlushRP) {
+    SkASSERT(!fInstanceBuffer);
+    SkASSERT(!fHasOpenBatch);  // Call closeCurrentBatch() first.
+
+    // Here we layout a single instance buffer to share with every internal batch.
+    //
+    // Rather than place each instance array in its own GPU buffer, we allocate a single
+    // megabuffer and lay them all out side-by-side. We can offset the "baseInstance" parameter in
+    // our draw calls to direct the GPU to the applicable elements within a given array.
+    fBaseInstances[0].fStrokes[0] = 0;
+    fBaseInstances[1].fStrokes[0] = fInstanceCounts[0]->fStrokes[0];
+    int endLinearStrokesIdx = fBaseInstances[1].fStrokes[0] + fInstanceCounts[1]->fStrokes[0];
+
+    int cubicStrokesIdx = GR_CT_DIV_ROUND_UP(endLinearStrokesIdx * sizeof(LinearStrokeInstance),
+                                             sizeof(CubicStrokeInstance));
+    for (int i = 1; i <= kMaxNumLinearSegmentsLog2; ++i) {
+        for (int j = 0; j < kNumScissorModes; ++j) {
+            fBaseInstances[j].fStrokes[i] = cubicStrokesIdx;
+            cubicStrokesIdx += fInstanceCounts[j]->fStrokes[i];
+        }
+    }
+
+    int trianglesIdx = GR_CT_DIV_ROUND_UP(cubicStrokesIdx * sizeof(CubicStrokeInstance),
+                                          sizeof(TriangleInstance));
+    fBaseInstances[0].fTriangles = trianglesIdx;
+    fBaseInstances[1].fTriangles =
+            fBaseInstances[0].fTriangles + fInstanceCounts[0]->fTriangles;
+    int endTrianglesIdx =
+            fBaseInstances[1].fTriangles + fInstanceCounts[1]->fTriangles;
+
+    int conicsIdx = GR_CT_DIV_ROUND_UP(endTrianglesIdx * sizeof(TriangleInstance),
+                                       sizeof(ConicInstance));
+    fBaseInstances[0].fConics = conicsIdx;
+    fBaseInstances[1].fConics = fBaseInstances[0].fConics + fInstanceCounts[0]->fConics;
+
+    InstanceBufferBuilder builder(onFlushRP, this);
+    if (!builder.isMapped()) {
+        return false;  // Buffer allocation failed.
+    }
+
+    // Now parse the GrCCStrokeGeometry and expand it into the instance buffer.
+    int pathIdx = 0;
+    int ptsIdx = 0;
+    int paramsIdx = 0;
+    int normalsIdx = 0;
+
+    const SkTArray<GrCCStrokeGeometry::Parameter, true>& params = fGeometry.params();
+    const SkTArray<SkPoint, true>& pts = fGeometry.points();
+    const SkTArray<SkVector, true>& normals = fGeometry.normals();
+
+    float miterCapHeightOverWidth=0, conicWeight=0;
+
+    for (Verb verb : fGeometry.verbs()) {
+        switch (verb) {
+            case Verb::kBeginPath:
+                builder.updateCurrentInfo(fPathInfos[pathIdx]);
+                ++pathIdx;
+                continue;
+
+            case Verb::kLinearStroke:
+                builder.appendLinearStroke(&pts[ptsIdx]);
+                ++ptsIdx;
+                continue;
+            case Verb::kQuadraticStroke:
+                builder.appendQuadraticStroke(&pts[ptsIdx],
+                                              params[paramsIdx++].fNumLinearSegmentsLog2);
+                ptsIdx += 2;
+                ++normalsIdx;
+                continue;
+            case Verb::kCubicStroke:
+                builder.appendCubicStroke(&pts[ptsIdx], params[paramsIdx++].fNumLinearSegmentsLog2);
+                ptsIdx += 3;
+                ++normalsIdx;
+                continue;
+
+            case Verb::kRoundJoin:
+            case Verb::kInternalRoundJoin:
+                conicWeight = params[paramsIdx++].fConicWeight;
+                // fallthru
+            case Verb::kMiterJoin:
+                miterCapHeightOverWidth = params[paramsIdx++].fMiterCapHeightOverWidth;
+                // fallthru
+            case Verb::kBevelJoin:
+            case Verb::kInternalBevelJoin:
+                builder.appendJoin(verb, pts[ptsIdx], normals[normalsIdx], normals[normalsIdx + 1],
+                                   miterCapHeightOverWidth, conicWeight);
+                ++normalsIdx;
+                continue;
+
+            case Verb::kSquareCap:
+            case Verb::kRoundCap:
+                builder.appendCap(verb, pts[ptsIdx], normals[normalsIdx]);
+                continue;
+
+            case Verb::kEndContour:
+                ++ptsIdx;
+                ++normalsIdx;
+                continue;
+        }
+        SK_ABORT("Invalid CCPR stroke element.");
+    }
+
+    fInstanceBuffer = builder.finish();
+    SkASSERT(fPathInfos.count() == pathIdx);
+    SkASSERT(pts.count() == ptsIdx);
+    SkASSERT(normals.count() == normalsIdx);
+
+    fMeshesBuffer.reserve((1 + fMaxNumScissorSubBatches) * kMaxNumLinearSegmentsLog2);
+    fScissorsBuffer.reserve((1 + fMaxNumScissorSubBatches) * kMaxNumLinearSegmentsLog2);
+    return true;
+}
+
+void GrCCStroker::drawStrokes(GrOpFlushState* flushState, BatchID batchID,
+                              const SkIRect& drawBounds) const {
+    using PrimitiveType = GrCCCoverageProcessor::PrimitiveType;
+    SkASSERT(fInstanceBuffer);
+
+    if (kEmptyBatchID == batchID) {
+        return;
+    }
+    const Batch& batch = fBatches[batchID];
+    int startScissorSubBatch = (!batchID) ? 0 : fBatches[batchID - 1].fEndScissorSubBatch;
+
+    const InstanceTallies* startIndices[2];
+    startIndices[(int)GrScissorTest::kDisabled] = (!batchID)
+            ? &fZeroTallies : fBatches[batchID - 1].fNonScissorEndInstances;
+    startIndices[(int)GrScissorTest::kEnabled] = (!startScissorSubBatch)
+            ? &fZeroTallies : fScissorSubBatches[startScissorSubBatch - 1].fEndInstances;
+
+    GrPipeline pipeline(flushState->drawOpArgs().fProxy, GrScissorTest::kEnabled,
+                        SkBlendMode::kPlus);
+
+    // Draw linear strokes.
+    this->appendStrokeMeshesToBuffers(0, batch, startIndices, startScissorSubBatch, drawBounds);
+    if (!fMeshesBuffer.empty()) {
+        LinearStrokeProcessor linearProc;
+        this->flushBufferedMeshesAsStrokes(linearProc, flushState, pipeline, drawBounds);
+    }
+
+    // Draw cubic strokes. (Quadratics were converted to cubics for GPU processing.)
+    for (int i = 1; i <= kMaxNumLinearSegmentsLog2; ++i) {
+        this->appendStrokeMeshesToBuffers(i, batch, startIndices, startScissorSubBatch, drawBounds);
+    }
+    if (!fMeshesBuffer.empty()) {
+        CubicStrokeProcessor cubicProc;
+        this->flushBufferedMeshesAsStrokes(cubicProc, flushState, pipeline, drawBounds);
+    }
+
+    // Draw triangles.
+    GrCCCoverageProcessor triProc(flushState->resourceProvider(), PrimitiveType::kTriangles);
+    this->drawConnectingGeometry<&InstanceTallies::fTriangles>(
+            flushState, pipeline, triProc, batch, startIndices, startScissorSubBatch, drawBounds);
+
+    // Draw conics.
+    GrCCCoverageProcessor conicProc(flushState->resourceProvider(), PrimitiveType::kConics);
+    this->drawConnectingGeometry<&InstanceTallies::fConics>(
+            flushState, pipeline, conicProc, batch, startIndices, startScissorSubBatch, drawBounds);
+}
+
+void GrCCStroker::appendStrokeMeshesToBuffers(int numSegmentsLog2, const Batch& batch,
+                                              const InstanceTallies* startIndices[2],
+                                              int startScissorSubBatch,
+                                              const SkIRect& drawBounds) const {
+    // Linear strokes draw a quad. Cubic strokes emit a strip with normals at "numSegments"
+    // evenly-spaced points along the curve, plus one more for the final endpoint, plus two more for
+    // AA butt caps. (i.e., 2 vertices * (numSegments + 3).)
+    int numStripVertices = (0 == numSegmentsLog2) ? 4 : ((1 << numSegmentsLog2) + 3) * 2;
+
+    // Append non-scissored meshes.
+    int baseInstance = fBaseInstances[(int)GrScissorTest::kDisabled].fStrokes[numSegmentsLog2];
+    int startIdx = startIndices[(int)GrScissorTest::kDisabled]->fStrokes[numSegmentsLog2];
+    int endIdx = batch.fNonScissorEndInstances->fStrokes[numSegmentsLog2];
+    SkASSERT(endIdx >= startIdx);
+    if (int instanceCount = endIdx - startIdx) {
+        GrMesh& mesh = fMeshesBuffer.emplace_back(GrPrimitiveType::kTriangleStrip);
+        mesh.setInstanced(fInstanceBuffer.get(), instanceCount, baseInstance + startIdx,
+                          numStripVertices);
+        fScissorsBuffer.push_back(drawBounds);
+    }
+
+    // Append scissored meshes.
+    baseInstance = fBaseInstances[(int)GrScissorTest::kEnabled].fStrokes[numSegmentsLog2];
+    startIdx = startIndices[(int)GrScissorTest::kEnabled]->fStrokes[numSegmentsLog2];
+    for (int i = startScissorSubBatch; i < batch.fEndScissorSubBatch; ++i) {
+        const ScissorSubBatch& subBatch = fScissorSubBatches[i];
+        endIdx = subBatch.fEndInstances->fStrokes[numSegmentsLog2];
+        SkASSERT(endIdx >= startIdx);
+        if (int instanceCount = endIdx - startIdx) {
+            GrMesh& mesh = fMeshesBuffer.emplace_back(GrPrimitiveType::kTriangleStrip);
+            mesh.setInstanced(fInstanceBuffer.get(), instanceCount, baseInstance + startIdx,
+                              numStripVertices);
+            fScissorsBuffer.push_back(subBatch.fScissor);
+            startIdx = endIdx;
+        }
+    }
+}
+
+void GrCCStroker::flushBufferedMeshesAsStrokes(const GrPrimitiveProcessor& processor,
+                                               GrOpFlushState* flushState,
+                                               const GrPipeline& pipeline,
+                                               const SkIRect& drawBounds) const {
+    SkASSERT(fMeshesBuffer.count() == fScissorsBuffer.count());
+    GrPipeline::DynamicStateArrays dynamicStateArrays;
+    dynamicStateArrays.fScissorRects = fScissorsBuffer.begin();
+    flushState->rtCommandBuffer()->draw(processor, pipeline, nullptr, &dynamicStateArrays,
+                                        fMeshesBuffer.begin(), fMeshesBuffer.count(),
+                                        SkRect::Make(drawBounds));
+    // Don't call reset(), as that also resets the reserve count.
+    fMeshesBuffer.pop_back_n(fMeshesBuffer.count());
+    fScissorsBuffer.pop_back_n(fScissorsBuffer.count());
+}
+
+template<int GrCCStrokeGeometry::InstanceTallies::* InstanceType>
+void GrCCStroker::drawConnectingGeometry(GrOpFlushState* flushState, const GrPipeline& pipeline,
+                                         const GrCCCoverageProcessor& processor,
+                                         const Batch& batch, const InstanceTallies* startIndices[2],
+                                         int startScissorSubBatch,
+                                         const SkIRect& drawBounds) const {
+    // Append non-scissored meshes.
+    int baseInstance = fBaseInstances[(int)GrScissorTest::kDisabled].*InstanceType;
+    int startIdx = startIndices[(int)GrScissorTest::kDisabled]->*InstanceType;
+    int endIdx = batch.fNonScissorEndInstances->*InstanceType;
+    SkASSERT(endIdx >= startIdx);
+    if (int instanceCount = endIdx - startIdx) {
+        processor.appendMesh(fInstanceBuffer.get(), instanceCount, baseInstance + startIdx,
+                             &fMeshesBuffer);
+        fScissorsBuffer.push_back(drawBounds);
+    }
+
+    // Append scissored meshes.
+    baseInstance = fBaseInstances[(int)GrScissorTest::kEnabled].*InstanceType;
+    startIdx = startIndices[(int)GrScissorTest::kEnabled]->*InstanceType;
+    for (int i = startScissorSubBatch; i < batch.fEndScissorSubBatch; ++i) {
+        const ScissorSubBatch& subBatch = fScissorSubBatches[i];
+        endIdx = subBatch.fEndInstances->*InstanceType;
+        SkASSERT(endIdx >= startIdx);
+        if (int instanceCount = endIdx - startIdx) {
+            processor.appendMesh(fInstanceBuffer.get(), instanceCount, baseInstance + startIdx,
+                                 &fMeshesBuffer);
+            fScissorsBuffer.push_back(subBatch.fScissor);
+            startIdx = endIdx;
+        }
+    }
+
+    // Flush the geometry.
+    if (!fMeshesBuffer.empty()) {
+        SkASSERT(fMeshesBuffer.count() == fScissorsBuffer.count());
+        processor.draw(flushState, pipeline, fScissorsBuffer.begin(), fMeshesBuffer.begin(),
+                       fMeshesBuffer.count(), SkRect::Make(drawBounds));
+        // Don't call reset(), as that also resets the reserve count.
+        fMeshesBuffer.pop_back_n(fMeshesBuffer.count());
+        fScissorsBuffer.pop_back_n(fScissorsBuffer.count());
+    }
+}
diff --git a/src/gpu/ccpr/GrCCStroker.h b/src/gpu/ccpr/GrCCStroker.h
new file mode 100644
index 0000000..ac71011
--- /dev/null
+++ b/src/gpu/ccpr/GrCCStroker.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrCCStroker_DEFINED
+#define GrCCStroker_DEFINED
+
+#include "GrAllocator.h"
+#include "GrMesh.h"
+#include "SkNx.h"
+#include "ccpr/GrCCStrokeGeometry.h"
+
+class GrBuffer;
+class GrCCCoverageProcessor;
+class GrOnFlushResourceProvider;
+class GrOpFlushState;
+class GrPipeline;
+class GrPrimitiveProcessor;
+class SkMatrix;
+class SkPath;
+class SkStrokeRec;
+
+/**
+ * This class parses stroked SkPaths into a GPU instance buffer, then issues calls to draw their
+ * coverage counts.
+ */
+class GrCCStroker {
+public:
+    GrCCStroker(int numPaths, int numSkPoints, int numSkVerbs)
+            : fGeometry(numSkPoints, numSkVerbs), fPathInfos(numPaths) {}
+
+    // Parses a device-space SkPath into the current batch, using the SkPath's original verbs with
+    // 'deviceSpacePts', and the SkStrokeRec's original settings with 'strokeDevWidth'. Accepts an
+    // optional post-device-space translate for placement in an atlas.
+    //
+    // Strokes intended as hairlines must have a strokeDevWidth of 1. Non-hairline strokes can only
+    // be drawn with rigid body transforms; affine transformation of the stroke lines themselves is
+    // not yet supported.
+    void parseDeviceSpaceStroke(const SkPath&, const SkPoint* deviceSpacePts, const SkStrokeRec&,
+                                float strokeDevWidth, GrScissorTest,
+                                const SkIRect& clippedDevIBounds,
+                                const SkIVector& devToAtlasOffset);
+
+    using BatchID = int;
+
+    // Compiles the outstanding parsed paths into a batch, and returns an ID that can be used to
+    // draw their strokes in the future.
+    BatchID closeCurrentBatch();
+
+    // Builds an internal GPU buffer and prepares for calls to drawStrokes(). Caller must close the
+    // current batch before calling this method, and cannot parse new paths afer.
+    bool prepareToDraw(GrOnFlushResourceProvider*);
+
+    // Called after prepareToDraw(). Draws the given batch of path strokes.
+    void drawStrokes(GrOpFlushState*, BatchID, const SkIRect& drawBounds) const;
+
+private:
+    static constexpr int kNumScissorModes = 2;
+    static constexpr BatchID kEmptyBatchID = -1;
+    using Verb = GrCCStrokeGeometry::Verb;
+    using InstanceTallies = GrCCStrokeGeometry::InstanceTallies;
+
+    // Every kBeginPath verb has a corresponding PathInfo entry.
+    struct PathInfo {
+        SkIVector fDevToAtlasOffset;
+        float fStrokeRadius;
+        GrScissorTest fScissorTest;
+    };
+
+    // Defines a sub-batch of stroke instances that have a scissor test and the same scissor rect.
+    // Start indices are deduced by looking at the previous ScissorSubBatch.
+    struct ScissorSubBatch {
+        ScissorSubBatch(GrTAllocator<InstanceTallies>* alloc, const InstanceTallies& startIndices,
+                        const SkIRect& scissor)
+                : fEndInstances(&alloc->emplace_back(startIndices)), fScissor(scissor) {}
+        InstanceTallies* fEndInstances;
+        SkIRect fScissor;
+    };
+
+    // Defines a batch of stroke instances that can be drawn with drawStrokes(). Start indices are
+    // deduced by looking at the previous Batch in the list.
+    struct Batch {
+        Batch(GrTAllocator<InstanceTallies>* alloc, const InstanceTallies& startNonScissorIndices,
+              int startScissorSubBatch)
+                : fNonScissorEndInstances(&alloc->emplace_back(startNonScissorIndices))
+                , fEndScissorSubBatch(startScissorSubBatch) {}
+        InstanceTallies* fNonScissorEndInstances;
+        int fEndScissorSubBatch;
+    };
+
+    class InstanceBufferBuilder;
+
+    void appendStrokeMeshesToBuffers(int numSegmentsLog2, const Batch&,
+                                     const InstanceTallies* startIndices[2],
+                                     int startScissorSubBatch, const SkIRect& drawBounds) const;
+    void flushBufferedMeshesAsStrokes(const GrPrimitiveProcessor&, GrOpFlushState*, const
+                                      GrPipeline&, const SkIRect& drawBounds) const;
+
+    template<int GrCCStrokeGeometry::InstanceTallies::* InstanceType>
+    void drawConnectingGeometry(GrOpFlushState*, const GrPipeline&,
+                                const GrCCCoverageProcessor&, const Batch&,
+                                const InstanceTallies* startIndices[2], int startScissorSubBatch,
+                                const SkIRect& drawBounds) const;
+
+    GrCCStrokeGeometry fGeometry;
+    SkSTArray<32, PathInfo> fPathInfos;
+    SkSTArray<32, Batch> fBatches;
+    SkSTArray<32, ScissorSubBatch> fScissorSubBatches;
+    int fMaxNumScissorSubBatches = 0;
+    bool fHasOpenBatch = false;
+
+    const InstanceTallies fZeroTallies = InstanceTallies();
+    GrSTAllocator<128, InstanceTallies> fTalliesAllocator;
+    const InstanceTallies* fInstanceCounts[kNumScissorModes] = {&fZeroTallies, &fZeroTallies};
+
+    sk_sp<GrBuffer> fInstanceBuffer;
+    // The indices stored in batches are relative to these base instances.
+    InstanceTallies fBaseInstances[kNumScissorModes];
+
+    mutable SkSTArray<32, GrMesh> fMeshesBuffer;
+    mutable SkSTArray<32, SkIRect> fScissorsBuffer;
+};
+
+#endif
diff --git a/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp b/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
index f783259..0899690 100644
--- a/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
+++ b/src/gpu/ccpr/GrCoverageCountingPathRenderer.cpp
@@ -18,21 +18,6 @@
 
 using PathInstance = GrCCPathProcessor::Instance;
 
-// If a path spans more pixels than this, we need to crop it or else analytic AA can run out of fp32
-// precision.
-static constexpr float kPathCropThreshold = 1 << 16;
-
-static void crop_path(const SkPath& path, const SkIRect& cropbox, SkPath* out) {
-    SkPath cropboxPath;
-    cropboxPath.addRect(SkRect::Make(cropbox));
-    if (!Op(cropboxPath, path, kIntersect_SkPathOp, out)) {
-        // This can fail if the PathOps encounter NaN or infinities.
-        out->reset();
-    }
-    out->setIsVolatile(true);
-}
-
-
 GrCCPerOpListPaths::~GrCCPerOpListPaths() {
     // Ensure there are no surviving DrawPathsOps with a dangling pointer into this class.
     if (!fDrawOps.isEmpty()) {
@@ -84,45 +69,69 @@
 
 GrPathRenderer::CanDrawPath GrCoverageCountingPathRenderer::onCanDrawPath(
         const CanDrawPathArgs& args) const {
-    if (!args.fShape->style().isSimpleFill() || args.fShape->inverseFilled() ||
-        args.fViewMatrix->hasPerspective() || GrAAType::kCoverage != args.fAAType) {
+    const GrShape& shape = *args.fShape;
+    if (GrAAType::kCoverage != args.fAAType || shape.style().hasPathEffect() ||
+        args.fViewMatrix->hasPerspective() || shape.inverseFilled()) {
         return CanDrawPath::kNo;
     }
 
     SkPath path;
-    args.fShape->asPath(&path);
+    shape.asPath(&path);
 
-    SkRect devBounds;
-    args.fViewMatrix->mapRect(&devBounds, path.getBounds());
+    switch (shape.style().strokeRec().getStyle()) {
+        case SkStrokeRec::kFill_Style: {
+            SkRect devBounds;
+            args.fViewMatrix->mapRect(&devBounds, path.getBounds());
 
-    SkIRect clippedIBounds;
-    devBounds.roundOut(&clippedIBounds);
-    if (!clippedIBounds.intersect(*args.fClipConservativeBounds)) {
-        // Path is completely clipped away. Our code will eventually notice this before doing any
-        // real work.
-        return CanDrawPath::kYes;
+            SkIRect clippedIBounds;
+            devBounds.roundOut(&clippedIBounds);
+            if (!clippedIBounds.intersect(*args.fClipConservativeBounds)) {
+                // The path is completely clipped away. Our code will eventually notice this before
+                // doing any real work.
+                return CanDrawPath::kYes;
+            }
+
+            int64_t numPixels = sk_64_mul(clippedIBounds.height(), clippedIBounds.width());
+            if (path.countVerbs() > 1000 && path.countPoints() > numPixels) {
+                // This is a complicated path that has more vertices than pixels! Let's let the SW
+                // renderer have this one: It will probably be faster and a bitmap will require less
+                // total memory on the GPU than CCPR instance buffers would for the raw path data.
+                return CanDrawPath::kNo;
+            }
+
+            if (numPixels > 256 * 256) {
+                // Large paths can blow up the atlas fast. And they are not ideal for a two-pass
+                // rendering algorithm. Give the simpler direct renderers a chance before we commit
+                // to drawing it.
+                return CanDrawPath::kAsBackup;
+            }
+
+            if (args.fShape->hasUnstyledKey() && path.countVerbs() > 50) {
+                // Complex paths do better cached in an SDF, if the renderer will accept them.
+                return CanDrawPath::kAsBackup;
+            }
+
+            return CanDrawPath::kYes;
+        }
+
+        case SkStrokeRec::kStroke_Style:
+            if (!args.fViewMatrix->isSimilarity()) {
+                // The stroker currently only supports rigid-body transfoms for the stroke lines
+                // themselves. This limitation doesn't affect hairlines since their stroke lines are
+                // defined relative to device space.
+                return CanDrawPath::kNo;
+            }
+            // fallthru
+        case SkStrokeRec::kHairline_Style:
+            // The stroker does not support conics yet.
+            return !SkPathPriv::ConicWeightCnt(path) ? CanDrawPath::kYes : CanDrawPath::kNo;
+
+        case SkStrokeRec::kStrokeAndFill_Style:
+            return CanDrawPath::kNo;
     }
 
-    int64_t numPixels = sk_64_mul(clippedIBounds.height(), clippedIBounds.width());
-    if (path.countVerbs() > 1000 && path.countPoints() > numPixels) {
-        // This is a complicated path that has more vertices than pixels! Let's let the SW renderer
-        // have this one: It will probably be faster and a bitmap will require less total memory on
-        // the GPU than CCPR instance buffers would for the raw path data.
-        return CanDrawPath::kNo;
-    }
-
-    if (numPixels > 256 * 256) {
-        // Large paths can blow up the atlas fast. And they are not ideal for a two-pass rendering
-        // algorithm. Give the simpler direct renderers a chance before we commit to drawing it.
-        return CanDrawPath::kAsBackup;
-    }
-
-    if (args.fShape->hasUnstyledKey() && path.countVerbs() > 50) {
-        // Complex paths do better cached in an SDF, if the renderer will accept them.
-        return CanDrawPath::kAsBackup;
-    }
-
-    return CanDrawPath::kYes;
+    SK_ABORT("Invalid stroke style.");
+    return CanDrawPath::kNo;
 }
 
 bool GrCoverageCountingPathRenderer::onDrawPath(const DrawPathArgs& args) {
@@ -132,24 +141,8 @@
     GrRenderTargetContext* rtc = args.fRenderTargetContext;
     args.fClip->getConservativeBounds(rtc->width(), rtc->height(), &clipIBounds, nullptr);
 
-    SkRect devBounds;
-    args.fViewMatrix->mapRect(&devBounds, args.fShape->bounds());
-
-    std::unique_ptr<GrCCDrawPathsOp> op;
-    if (SkTMax(devBounds.height(), devBounds.width()) > kPathCropThreshold) {
-        // The path is too large. Crop it or analytic AA can run out of fp32 precision.
-        SkPath croppedPath;
-        args.fShape->asPath(&croppedPath);
-        croppedPath.transform(*args.fViewMatrix, &croppedPath);
-        crop_path(croppedPath, clipIBounds, &croppedPath);
-        // FIXME: This breaks local coords: http://skbug.com/8003
-        op = GrCCDrawPathsOp::Make(args.fContext, clipIBounds, SkMatrix::I(), GrShape(croppedPath),
-                                   croppedPath.getBounds(), std::move(args.fPaint));
-    } else {
-        op = GrCCDrawPathsOp::Make(args.fContext, clipIBounds, *args.fViewMatrix, *args.fShape,
-                                   devBounds, std::move(args.fPaint));
-    }
-
+    auto op = GrCCDrawPathsOp::Make(args.fContext, clipIBounds, *args.fViewMatrix, *args.fShape,
+                                    std::move(args.fPaint));
     this->recordOp(std::move(op), args);
     return true;
 }
@@ -180,7 +173,7 @@
             // The path is too large. Crop it or analytic AA can run out of fp32 precision.
             SkPath croppedPath;
             int maxRTSize = caps.maxRenderTargetSize();
-            crop_path(deviceSpacePath, SkIRect::MakeWH(maxRTSize, maxRTSize), &croppedPath);
+            CropPath(deviceSpacePath, SkIRect::MakeWH(maxRTSize, maxRTSize), &croppedPath);
             clipPath.init(croppedPath, accessRect, rtWidth, rtHeight, caps);
         } else {
             clipPath.init(deviceSpacePath, accessRect, rtWidth, rtHeight, caps);
@@ -256,11 +249,14 @@
 
     // Determine if there are enough reusable paths from last flush for it to be worth our time to
     // copy them to cached atlas(es).
-    DoCopiesToCache doCopies = DoCopiesToCache(specs.fNumCopiedPaths > 100 ||
+    int numCopies = specs.fNumCopiedPaths[GrCCPerFlushResourceSpecs::kFillIdx] +
+                    specs.fNumCopiedPaths[GrCCPerFlushResourceSpecs::kStrokeIdx];
+    DoCopiesToCache doCopies = DoCopiesToCache(numCopies > 100 ||
                                                specs.fCopyAtlasSpecs.fApproxNumPixels > 256 * 256);
-    if (specs.fNumCopiedPaths && DoCopiesToCache::kNo == doCopies) {
+    if (numCopies && DoCopiesToCache::kNo == doCopies) {
         specs.convertCopiesToRenders();
-        SkASSERT(!specs.fNumCopiedPaths);
+        SkASSERT(!specs.fNumCopiedPaths[GrCCPerFlushResourceSpecs::kFillIdx]);
+        SkASSERT(!specs.fNumCopiedPaths[GrCCPerFlushResourceSpecs::kStrokeIdx]);
     }
 
     auto resources = sk_make_sp<GrCCPerFlushResources>(onFlushRP, specs);
@@ -316,3 +312,14 @@
 
     SkDEBUGCODE(fFlushing = false);
 }
+
+void GrCoverageCountingPathRenderer::CropPath(const SkPath& path, const SkIRect& cropbox,
+                                              SkPath* out) {
+    SkPath cropboxPath;
+    cropboxPath.addRect(SkRect::Make(cropbox));
+    if (!Op(cropboxPath, path, kIntersect_SkPathOp, out)) {
+        // This can fail if the PathOps encounter NaN or infinities.
+        out->reset();
+    }
+    out->setIsVolatile(true);
+}
diff --git a/src/gpu/ccpr/GrCoverageCountingPathRenderer.h b/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
index 0ec9aa0..bc336a5 100644
--- a/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
+++ b/src/gpu/ccpr/GrCoverageCountingPathRenderer.h
@@ -70,6 +70,12 @@
     void testingOnly_drawPathDirectly(const DrawPathArgs&);
     const GrUniqueKey& testingOnly_getStashedAtlasKey() const;
 
+    // If a path spans more pixels than this, we need to crop it or else analytic AA can run out of
+    // fp32 precision.
+    static constexpr float kPathCropThreshold = 1 << 16;
+
+    static void CropPath(const SkPath&, const SkIRect& cropbox, SkPath* out);
+
 private:
     GrCoverageCountingPathRenderer(AllowCaching);