Add dynamic stroke attribs to tessellated stroking

Allows us to batch together strokes that have different SkStrokeRecs.

Bug: chromium:1172543
Bug: skia:10419
Change-Id: I11dc01e60bc17a6bb3c3b635f9edb2944a2f2edc
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/369579
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp b/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
index fd4089b..099d83c 100644
--- a/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
+++ b/src/gpu/tessellate/GrStrokeHardwareTessellator.cpp
@@ -42,26 +42,19 @@
     return xx*xx;
 }
 
-GrStrokeHardwareTessellator::GrStrokeHardwareTessellator(const GrShaderCaps& shaderCaps,
-                                                         const SkMatrix& viewMatrix,
-                                                         const SkStrokeRec& stroke)
-        // Subtract 2 because the tessellation shader chops every cubic at two locations, and each
-        // chop has the potential to introduce an extra segment.
-        : fMaxTessellationSegments(shaderCaps.maxTessellationSegments() - 2)
-        , fStroke(stroke)
-        , fTolerances(GrStrokeTessellateShader::Tolerances::MakePreTransform(viewMatrix, stroke)) {
+void GrStrokeHardwareTessellator::updateTolerances(Tolerances tolerances, SkPaint::Join joinType) {
     // Calculate the worst-case numbers of parametric segments our hardware can support for the
     // current stroke radius, in the event that there are also enough radial segments to rotate
     // 180 and 360 degrees respectively. These are used for "quick accepts" that allow us to
     // send almost all curves directly to the hardware without having to chop.
     float numRadialSegments180 = std::max(std::ceil(
-            SK_ScalarPI * fTolerances.fNumRadialSegmentsPerRadian), 1.f);
+            SK_ScalarPI * tolerances.fNumRadialSegmentsPerRadian), 1.f);
     float maxParametricSegments180 = num_parametric_segments(fMaxTessellationSegments,
                                                              numRadialSegments180);
     fMaxParametricSegments180_pow4 = pow4(maxParametricSegments180);
 
     float numRadialSegments360 = std::max(std::ceil(
-            2*SK_ScalarPI * fTolerances.fNumRadialSegmentsPerRadian), 1.f);
+            2*SK_ScalarPI * tolerances.fNumRadialSegmentsPerRadian), 1.f);
     float maxParametricSegments360 = num_parametric_segments(fMaxTessellationSegments,
                                                              numRadialSegments360);
     fMaxParametricSegments360_pow4 = pow4(maxParametricSegments360);
@@ -69,7 +62,7 @@
     // Now calculate the worst-case numbers of parametric segments if we are to integrate a join
     // into the same patch as the curve.
     float maxNumSegmentsInJoin;
-    switch (fStroke.getJoin()) {
+    switch (joinType) {
         case SkPaint::kBevel_Join:
             maxNumSegmentsInJoin = 1;
             break;
@@ -89,6 +82,7 @@
             maxParametricSegments360 - maxNumSegmentsInJoin - 1, 0.f));
     fMaxCombinedSegments_withJoin = fMaxTessellationSegments - maxNumSegmentsInJoin - 1;
     fSoloRoundJoinAlwaysFitsInPatch = (numRadialSegments180 <= fMaxTessellationSegments);
+    fTolerances = tolerances;
 }
 
 static bool conic_has_cusp(const SkPoint p[3]) {
@@ -101,17 +95,35 @@
 }
 
 void GrStrokeHardwareTessellator::prepare(GrMeshDrawOp::Target* target, const SkMatrix& viewMatrix,
-                                          const GrSTArenaList<SkPath>& pathList, const SkStrokeRec&,
+                                          const GrSTArenaList<PathStroke>& pathStrokeList,
                                           int totalCombinedVerbCnt) {
+    SkASSERT(!fTarget);
+    SkASSERT(!fViewMatrix);
+    SkASSERT(!fStroke);
+
     fTarget = target;
     fViewMatrix = &viewMatrix;
 
+    std::array<float, 2> matrixScales;
+    if (!fViewMatrix->getMinMaxScales(matrixScales.data())) {
+        matrixScales.fill(1);
+    }
+
     // Pre-allocate at least enough vertex space for 1 in 4 strokes to chop, and for 8 caps.
     int strokePreallocCount = totalCombinedVerbCnt * 5/4;
     int capPreallocCount = 8;
     this->allocPatchChunkAtLeast(strokePreallocCount + capPreallocCount);
 
-    for (const SkPath& path : pathList) {
+    for (const auto& [path, stroke] : pathStrokeList) {
+        if (!fStroke || fStroke->getWidth() != stroke.getWidth() ||
+            fStroke->getJoin() != stroke.getJoin()) {
+            auto tolerances = Tolerances::MakePreTransform(matrixScales.data(), stroke.getWidth());
+            this->updateTolerances(tolerances, stroke.getJoin());
+        }
+        if (fShaderFlags & ShaderFlags::kDynamicStroke) {
+            fDynamicStroke.set(stroke);
+        }
+        fStroke = &stroke;
         fHasLastControlPoint = false;
         SkDEBUGCODE(fHasCurrentPoint = false;)
         SkPathVerb previousVerb = SkPathVerb::kClose;
@@ -170,10 +182,11 @@
     }
 
     fTarget->putBackVertices(fCurrChunkPatchCapacity - fPatchChunks.back().fPatchCount,
-                             GrStrokeTessellateShader::kTessellationPatchBaseStride);
+                             fPatchStride);
 
     fTarget = nullptr;
     fViewMatrix = nullptr;
+    fStroke = nullptr;
 }
 
 void GrStrokeHardwareTessellator::moveTo(SkPoint pt) {
@@ -429,7 +442,7 @@
     }
 
     if (!fSoloRoundJoinAlwaysFitsInPatch && maxDepth != 0 &&
-        (fStroke.getJoin() == SkPaint::kRound_Join || joinType == JoinType::kBowtie)) {
+        (fStroke->getJoin() == SkPaint::kRound_Join || joinType == JoinType::kBowtie)) {
         SkVector tan0 = fCurrentPoint - fLastControlPoint;
         SkVector tan1 = nextControlPoint - fCurrentPoint;
         float rotation = SkMeasureAngleBetweenVectors(tan0, tan1);
@@ -500,7 +513,7 @@
         // are specified to be drawn as an axis-aligned square or circle respectively. Assign
         // default control points that achieve this.
         SkVector outset;
-        if (!fStroke.isHairlineStyle()) {
+        if (!fStroke->isHairlineStyle()) {
             outset = {1, 0};
         } else {
             // If the stroke is hairline, orient the square on the post-transform x-axis instead.
@@ -529,13 +542,13 @@
         fHasLastControlPoint = true;
     }
 
-    switch (fStroke.getCap()) {
+    switch (fStroke->getCap()) {
         case SkPaint::kButt_Cap:
             break;
         case SkPaint::kRound_Cap: {
             // A round cap is the same thing as a 180-degree round join.
             // If our join type isn't round we can alternatively use a bowtie.
-            JoinType roundCapJoinType = (fStroke.getJoin() == SkPaint::kRound_Join)
+            JoinType roundCapJoinType = (fStroke->getJoin() == SkPaint::kRound_Join)
                     ? JoinType::kFromStroke : JoinType::kBowtie;
             this->joinTo(roundCapJoinType, fLastControlPoint);
             this->moveTo(fCurrContourStartPoint, fCurrContourFirstControlPoint);
@@ -545,9 +558,9 @@
         case SkPaint::kSquare_Cap: {
             // A square cap is the same as appending lineTos.
             SkVector lastTangent = fCurrentPoint - fLastControlPoint;
-            if (!fStroke.isHairlineStyle()) {
+            if (!fStroke->isHairlineStyle()) {
                 // Extend the cap by 1/2 stroke width.
-                lastTangent *= (.5f * fStroke.getWidth()) / lastTangent.length();
+                lastTangent *= (.5f * fStroke->getWidth()) / lastTangent.length();
             } else {
                 // Extend the cap by what will be 1/2 pixel after transformation.
                 lastTangent *=
@@ -556,9 +569,9 @@
             this->lineTo(fCurrentPoint + lastTangent);
             this->moveTo(fCurrContourStartPoint, fCurrContourFirstControlPoint);
             SkVector firstTangent = fCurrContourFirstControlPoint - fCurrContourStartPoint;
-            if (!fStroke.isHairlineStyle()) {
+            if (!fStroke->isHairlineStyle()) {
                 // Set the the cap back by 1/2 stroke width.
-                firstTangent *= (-.5f * fStroke.getWidth()) / firstTangent.length();
+                firstTangent *= (-.5f * fStroke->getWidth()) / firstTangent.length();
             } else {
                 // Set the cap back by what will be 1/2 pixel after transformation.
                 firstTangent *=
@@ -607,6 +620,7 @@
         // control point equal to p0.
         fPatchWriter.write((prevJoinType == JoinType::kNone) ? p[0] : fLastControlPoint);
         fPatchWriter.writeArray(p, 4);
+        this->emitDynamicAttribs();
     }
 
     fLastControlPoint = c2;
@@ -630,11 +644,18 @@
             fPatchWriter.write(fCurrentPoint, fCurrentPoint);
         }
         fPatchWriter.write(nextControlPoint);
+        this->emitDynamicAttribs();
     }
 
     fLastControlPoint = nextControlPoint;
 }
 
+void GrStrokeHardwareTessellator::emitDynamicAttribs() {
+    if (fShaderFlags & ShaderFlags::kDynamicStroke) {
+        fPatchWriter.write(fDynamicStroke);
+    }
+}
+
 bool GrStrokeHardwareTessellator::reservePatch() {
     if (fPatchChunks.back().fPatchCount >= fCurrChunkPatchCapacity) {
         // The current chunk is full. Time to allocate a new one. (And no need to put back vertices;
@@ -653,10 +674,9 @@
 void GrStrokeHardwareTessellator::allocPatchChunkAtLeast(int minPatchAllocCount) {
     SkASSERT(fTarget);
     PatchChunk* chunk = &fPatchChunks.push_back();
-    fPatchWriter = {fTarget->makeVertexSpaceAtLeast(
-            GrStrokeTessellateShader::kTessellationPatchBaseStride, minPatchAllocCount,
-            minPatchAllocCount, &chunk->fPatchBuffer, &chunk->fBasePatch,
-            &fCurrChunkPatchCapacity)};
+    fPatchWriter = {fTarget->makeVertexSpaceAtLeast(fPatchStride, minPatchAllocCount,
+                                                    minPatchAllocCount, &chunk->fPatchBuffer,
+                                                    &chunk->fBasePatch, &fCurrChunkPatchCapacity)};
     fCurrChunkMinPatchAllocCount = minPatchAllocCount;
 }
 
diff --git a/src/gpu/tessellate/GrStrokeHardwareTessellator.h b/src/gpu/tessellate/GrStrokeHardwareTessellator.h
index 7eae589..3488bd8 100644
--- a/src/gpu/tessellate/GrStrokeHardwareTessellator.h
+++ b/src/gpu/tessellate/GrStrokeHardwareTessellator.h
@@ -18,14 +18,22 @@
 // MSAA if antialiasing is desired.
 class GrStrokeHardwareTessellator : public GrStrokeTessellator {
 public:
-    GrStrokeHardwareTessellator(const GrShaderCaps&, const SkMatrix&, const SkStrokeRec& stroke);
+    GrStrokeHardwareTessellator(ShaderFlags shaderFlags, const GrShaderCaps& shaderCaps)
+            : GrStrokeTessellator(shaderFlags)
+            , fPatchStride(GrStrokeTessellateShader::PatchStride(fShaderFlags))
+            // Subtract 2 because the tessellation shader chops every cubic at two locations, and
+            // each chop has the potential to introduce an extra segment.
+            , fMaxTessellationSegments(shaderCaps.maxTessellationSegments() - 2) {
+    }
 
-    void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const GrSTArenaList<SkPath>&,
-                 const SkStrokeRec&, int totalCombinedVerbCnt) override;
+    void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const GrSTArenaList<PathStroke>&,
+                 int totalCombinedVerbCnt) override;
 
     void draw(GrOpFlushState*) const override;
 
 private:
+    using Tolerances = GrStrokeTessellateShader::Tolerances;
+
     enum class JoinType {
         kFromStroke,  // The shader will use the join type defined in our fStrokeRec.
         kBowtie,  // Double sided round join.
@@ -38,6 +46,10 @@
         kYes
     };
 
+    // Updates our internal tolerances for determining how much subdivision to do. We need to ensure
+    // every curve we emit requires no more segments than fMaxTessellationSegments.
+    void updateTolerances(Tolerances, SkPaint::Join strokeJoin);
+
     void moveTo(SkPoint);
     void moveTo(SkPoint, SkPoint lastControlPoint);
     void lineTo(SkPoint, JoinType prevJoinType = JoinType::kFromStroke);
@@ -60,26 +72,27 @@
     void cap();
     void emitPatch(JoinType prevJoinType, const SkPoint pts[4], SkPoint endPt);
     void emitJoinPatch(JoinType, SkPoint nextControlPoint);
+    void emitDynamicAttribs();
     bool reservePatch();
     void allocPatchChunkAtLeast(int minPatchAllocCount);
 
+    // Size in bytes of a tessellation patch with our shader flags.
+    const size_t fPatchStride;
+
     // The maximum number of tessellation segments the hardware can emit for a single patch.
     const int fMaxTessellationSegments;
-    const SkStrokeRec fStroke;
 
-    // Tolerances the tessellation shader will use for determining how much subdivision to do. We
-    // need to ensure every curve we emit doesn't require more than fMaxTessellationSegments.
-    GrStrokeTessellateShader::Tolerances fTolerances;
-
-    // The target and view matrix will only be non-null during prepare() and its callees.
+    // These will only be valid during prepare() and its callees.
     GrMeshDrawOp::Target* fTarget = nullptr;
     const SkMatrix* fViewMatrix = nullptr;
+    const SkStrokeRec* fStroke = nullptr;
 
     // These values contain worst-case numbers of parametric segments, raised to the 4th power, that
     // our hardware can support for the current stroke radius. They assume curve rotations of 180
     // and 360 degrees respectively. These are used for "quick accepts" that allow us to send almost
     // all curves directly to the hardware without having to chop. We raise to the 4th power because
     // the "pow4" variants of Wang's formula are the quickest to evaluate.
+    GrStrokeTessellateShader::Tolerances fTolerances;
     float fMaxParametricSegments180_pow4;
     float fMaxParametricSegments360_pow4;
     float fMaxParametricSegments180_pow4_withJoin;
@@ -102,7 +115,7 @@
     GrVertexWriter fPatchWriter;
 
     // Variables related to the specific contour that we are currently iterating during
-    // prepareBuffers.
+    // prepareBuffers().
     bool fHasLastControlPoint = false;
     SkDEBUGCODE(bool fHasCurrentPoint = false;)
     SkPoint fCurrContourStartPoint;
@@ -110,6 +123,9 @@
     SkPoint fLastControlPoint;
     SkPoint fCurrentPoint;
 
+    // Stateful values for the dynamic state (if any) that will get written out with each patch.
+    GrStrokeTessellateShader::DynamicStroke fDynamicStroke;
+
     friend class GrOp;  // For ctor.
 
 public:
diff --git a/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp b/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
index 1e15cf4..cf5e91d 100644
--- a/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
+++ b/src/gpu/tessellate/GrStrokeIndirectTessellator.cpp
@@ -66,18 +66,25 @@
 public:
     constexpr static int8_t kMaxResolveLevel = GrStrokeIndirectTessellator::kMaxResolveLevel;
 
-    ResolveLevelCounter(const SkStrokeRec& stroke, GrStrokeTessellateShader::Tolerances tolerances,
-                        int* resolveLevelCounts) :
+    ResolveLevelCounter(const SkMatrix& viewMatrix, int* resolveLevelCounts)
+            : fResolveLevelCounts(resolveLevelCounts) {
+        if (!viewMatrix.getMinMaxScales(fMatrixMinMaxScales.data())) {
+            fMatrixMinMaxScales.fill(1);
+        }
+    }
+
+    void updateTolerances(float strokeWidth, bool isRoundJoin) {
+        this->flush();
+        fTolerances = GrStrokeTessellateShader::Tolerances::MakePreTransform(
+                fMatrixMinMaxScales.data(), strokeWidth);
+        fResolveLevelForCircles = SkTPin<float>(
+                sk_float_nextlog2(fTolerances.fNumRadialSegmentsPerRadian * SK_ScalarPI),
+                1, kMaxResolveLevel);
+        fIsRoundJoin = isRoundJoin;
 #if USE_SIMD
-            fWangsTermQuadratic(GrWangsFormula::length_term<2>(tolerances.fParametricIntolerance)),
-            fWangsTermCubic(GrWangsFormula::length_term<3>(tolerances.fParametricIntolerance)),
+        fWangsTermQuadratic = GrWangsFormula::length_term<2>(fTolerances.fParametricIntolerance);
+        fWangsTermCubic = GrWangsFormula::length_term<3>(fTolerances.fParametricIntolerance);
 #endif
-            fIsRoundJoin(stroke.getJoin() == SkPaint::kRound_Join),
-            fTolerances(tolerances),
-            fResolveLevelForCircles(SkTPin<float>(
-                    sk_float_nextlog2(fTolerances.fNumRadialSegmentsPerRadian * SK_ScalarPI),
-                    1, kMaxResolveLevel)),
-            fResolveLevelCounts(resolveLevelCounts) {
     }
 
     bool isRoundJoin() const { return fIsRoundJoin; }
@@ -419,21 +426,24 @@
         float fCubicChopTs[8];
     };
 
-    const float fWangsTermQuadratic;
-    const float fWangsTermCubic;
+    float fWangsTermQuadratic;
+    float fWangsTermCubic;
 
 #endif
-    const bool fIsRoundJoin;
-    const GrStrokeTessellateShader::Tolerances fTolerances;
-    const int fResolveLevelForCircles;
     int* const fResolveLevelCounts;
+    std::array<float, 2> fMatrixMinMaxScales;
+    GrStrokeTessellateShader::Tolerances fTolerances;
+    int fResolveLevelForCircles;
+    bool fIsRoundJoin;
 };
 
 }  // namespace
 
 GrStrokeIndirectTessellator::GrStrokeIndirectTessellator(
-        const SkMatrix& viewMatrix, const GrSTArenaList<SkPath>& pathList,
-        const SkStrokeRec& stroke, int totalCombinedVerbCnt, SkArenaAlloc* alloc) {
+        ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
+        const GrSTArenaList<PathStroke>& pathStrokeList, int totalCombinedVerbCnt,
+        SkArenaAlloc* alloc)
+        : GrStrokeTessellator(shaderFlags) {
     SkASSERT(!fTotalInstanceCount);
     SkASSERT(!fResolveLevels);
     SkASSERT(!fResolveLevelArrayCount);
@@ -454,11 +464,19 @@
     fChopTs = alloc->makeArrayDefault<float>(chopTAllocCount);
     float* nextChopTs = fChopTs;
 
-    auto tolerances = GrStrokeTessellateShader::Tolerances::MakePreTransform(viewMatrix, stroke);
-    ResolveLevelCounter counter(stroke, tolerances, fResolveLevelCounts);
+    ResolveLevelCounter counter(viewMatrix, fResolveLevelCounts);
 
+    float lastStrokeWidth = -1;
     SkPoint lastControlPoint = {0,0};
-    for (const SkPath& path : pathList) {
+    for (const auto& [path, stroke] : pathStrokeList) {
+        SkASSERT(stroke.getWidth() >= 0);  // Otherwise we can't initialize lastStrokeWidth=-1.
+        if (stroke.getWidth() != lastStrokeWidth ||
+            (stroke.getJoin() == SkPaint::kRound_Join) != counter.isRoundJoin()) {
+            counter.updateTolerances(stroke.getWidth(), (stroke.getJoin() == SkPaint::kRound_Join));
+            lastStrokeWidth = stroke.getWidth();
+        }
+        fMaxNumExtraEdgesInJoin = std::max(fMaxNumExtraEdgesInJoin,
+                GrStrokeTessellateShader::NumExtraEdgesInIndirectJoin(stroke.getJoin()));
         // Iterate through each verb in the stroke, counting its resolveLevel(s).
         GrStrokeIterator iter(path, &stroke, &viewMatrix);
         while (iter.next()) {
@@ -610,11 +628,14 @@
 // per bin. Provides methods to write strokes to their respective bins.
 class BinningInstanceWriter {
 public:
+    using ShaderFlags = GrStrokeTessellateShader::ShaderFlags;
+    using DynamicStroke = GrStrokeTessellateShader::DynamicStroke;
     constexpr static int kNumBins = GrStrokeIndirectTessellator::kMaxResolveLevel + 1;
 
     BinningInstanceWriter(GrDrawIndirectWriter* indirectWriter, GrVertexWriter* instanceWriter,
-                          size_t instanceStride, int baseInstance, int numExtraEdgesInJoin,
-                          const int resolveLevelCounts[kNumBins]) {
+                          ShaderFlags shaderFlags, size_t instanceStride, int baseInstance,
+                          int numExtraEdgesInJoin, const int resolveLevelCounts[kNumBins])
+            : fShaderFlags(shaderFlags) {
         // Partition the instance buffer into bins and write out indirect draw commands per bin.
         int runningInstanceCount = 0;
         for (int i = 0; i < kNumBins; ++i) {
@@ -641,6 +662,11 @@
         *instanceWriter = instanceWriter->makeOffset(instanceStride * runningInstanceCount);
     }
 
+    void updateDynamicStroke(const SkStrokeRec& stroke) {
+        SkASSERT(fShaderFlags & ShaderFlags::kDynamicStroke);
+        fDynamicStroke.set(stroke);
+    }
+
     void writeStroke(int8_t resolveLevel, const SkPoint pts[4], SkPoint prevControlPoint,
                      bool isInternalChop = false) {
         SkASSERT(0 <= resolveLevel && resolveLevel < kNumBins);
@@ -651,6 +677,7 @@
                                              // instance follows a chop, and round joins from
                                              // chopping always get exactly one segment.
                                              (isInternalChop) ? -numEdges : +numEdges);
+        this->writeDynamicAttribs(resolveLevel);
     }
 
     // Writes out a 180-degree point stroke, which renders as a circle with a diameter equal to the
@@ -662,6 +689,7 @@
         // Mark numTotalEdges negative so the shader assigns the least possible number of edges to
         // its (empty) preceding join.
         fInstanceWriters[resolveLevel].write(-fNumEdgesPerResolveLevel[resolveLevel]);
+        this->writeDynamicAttribs(resolveLevel);
     }
 
 #ifdef SK_DEBUG
@@ -675,16 +703,26 @@
 #endif
 
 private:
+    void writeDynamicAttribs(int8_t resolveLevel) {
+        if (fShaderFlags & ShaderFlags::kDynamicStroke) {
+            fInstanceWriters[resolveLevel].write(fDynamicStroke);
+        }
+    }
+
+    const ShaderFlags fShaderFlags;
     GrVertexWriter fInstanceWriters[kNumBins];
     float fNumEdgesPerResolveLevel[kNumBins];
     SkDEBUGCODE(GrVertexWriter fEndWriters[kNumBins];)
+
+    // Stateful value for the dynamic stroke (if any) that will get written out with each instance.
+    DynamicStroke fDynamicStroke;
 };
 
 }  // namespace
 
 void GrStrokeIndirectTessellator::prepare(GrMeshDrawOp::Target* target, const SkMatrix& viewMatrix,
-                                          const GrSTArenaList<SkPath>& pathList,
-                                          const SkStrokeRec& stroke, int totalCombinedVerbCnt) {
+                                          const GrSTArenaList<PathStroke>& pathStrokeList,
+                                          int totalCombinedVerbCnt) {
     SkASSERT(fResolveLevels);
     SkASSERT(!fDrawIndirectBuffer);
     SkASSERT(!fInstanceBuffer);
@@ -713,7 +751,7 @@
 
     // We already know the instance count. Allocate an instance for each.
     int baseInstance;
-    size_t instanceStride = GrStrokeTessellateShader::kIndirectInstanceBaseStride;
+    size_t instanceStride = GrStrokeTessellateShader::IndirectInstanceStride(fShaderFlags);
     GrVertexWriter instanceWriter = {target->makeVertexSpace(instanceStride, fTotalInstanceCount,
                                                              &fInstanceBuffer, &baseInstance)};
     if (!instanceWriter.isValid()) {
@@ -724,15 +762,13 @@
     SkDEBUGCODE(auto endInstanceWriter = instanceWriter.makeOffset(instanceStride *
                                                                    fTotalInstanceCount);)
 
-    BinningInstanceWriter binningWriter(
-            &indirectWriter, &instanceWriter, instanceStride, baseInstance,
-            GrStrokeTessellateShader::NumExtraEdgesInIndirectJoin(stroke.getJoin()),
-            fResolveLevelCounts);
+    BinningInstanceWriter binningWriter(&indirectWriter, &instanceWriter, fShaderFlags,
+                                        instanceStride, baseInstance, fMaxNumExtraEdgesInJoin,
+                                        fResolveLevelCounts);
 
     SkPoint scratchBuffer[4 + 10];
     SkPoint* scratch = scratchBuffer;
 
-    bool isRoundJoin = (stroke.getJoin() == SkPaint::kRound_Join);
     int8_t* nextResolveLevel = fResolveLevels;
     float* nextChopTs = fChopTs;
 
@@ -742,7 +778,11 @@
     int8_t resolveLevel;
 
     // Now write out each instance to its resolveLevel's designated location in the instance buffer.
-    for (const SkPath& path : pathList) {
+    for (const auto& [path, stroke] : pathStrokeList) {
+        bool isRoundJoin = (stroke.getJoin() == SkPaint::kRound_Join);
+        if (fShaderFlags & ShaderFlags::kDynamicStroke) {
+            binningWriter.updateDynamicStroke(stroke);
+        }
         GrStrokeIterator iter(path, &stroke, &viewMatrix);
         bool hasLastControlPoint = false;
         while (iter.next()) {
diff --git a/src/gpu/tessellate/GrStrokeIndirectTessellator.h b/src/gpu/tessellate/GrStrokeIndirectTessellator.h
index 81a8e8b..e3f3012 100644
--- a/src/gpu/tessellate/GrStrokeIndirectTessellator.h
+++ b/src/gpu/tessellate/GrStrokeIndirectTessellator.h
@@ -22,11 +22,11 @@
     // become an issue if we try to draw a stroke with an astronomically wide width.
     constexpr static int8_t kMaxResolveLevel = 15;
 
-    GrStrokeIndirectTessellator(const SkMatrix&, const GrSTArenaList<SkPath>&,
-                                const SkStrokeRec&, int totalCombinedVerbCnt, SkArenaAlloc*);
+    GrStrokeIndirectTessellator(ShaderFlags, const SkMatrix&, const GrSTArenaList<PathStroke>&,
+                                int totalCombinedVerbCnt, SkArenaAlloc*);
 
-    void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const GrSTArenaList<SkPath>&,
-                 const SkStrokeRec&, int totalCombinedVerbCnt) override;
+    void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const GrSTArenaList<PathStroke>&,
+                 int totalCombinedVerbCnt) override;
 
     void draw(GrOpFlushState*) const override;
 
@@ -48,6 +48,11 @@
     float* fChopTs = nullptr;
     SkDEBUGCODE(int fChopTsArrayCount = 0;)
 
+    // Bevel, miter, and round joins require us to add different numbers of additional edges onto
+    // their triangle strips. When using dynamic stroke, we just append the maximum required number
+    // of additional edges to every instance.
+    int fMaxNumExtraEdgesInJoin = 0;
+
     // GPU buffers for drawing.
     sk_sp<const GrBuffer> fDrawIndirectBuffer;
     sk_sp<const GrBuffer> fInstanceBuffer;
diff --git a/src/gpu/tessellate/GrStrokeTessellateOp.cpp b/src/gpu/tessellate/GrStrokeTessellateOp.cpp
index 745ea8a..0af849e 100644
--- a/src/gpu/tessellate/GrStrokeTessellateOp.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellateOp.cpp
@@ -20,14 +20,15 @@
         : GrDrawOp(ClassID())
         , fAAType(aaType)
         , fViewMatrix(viewMatrix)
-        , fStroke(stroke)
         , fColor(paint.getColor4f())
         , fProcessors(std::move(paint))
-        , fPathList(path)
-        , fTotalCombinedVerbCnt(path.countVerbs())
-        , fHasConics(SkPathPriv::ConicWeightCnt(path) != 0) {
+        , fPathStrokeList(path, stroke)
+        , fTotalCombinedVerbCnt(path.countVerbs()) {
+    if (SkPathPriv::ConicWeightCnt(path) != 0) {
+        fShaderFlags |= ShaderFlags::kHasConics;
+    }
     SkRect devBounds = path.getBounds();
-    float inflationRadius = fStroke.getInflationRadius();
+    float inflationRadius = stroke.getInflationRadius();
     devBounds.outset(inflationRadius, inflationRadius);
     viewMatrix.mapRect(&devBounds, devBounds);
     this->setBounds(devBounds, HasAABloat(GrAAType::kCoverage == fAAType), IsHairline::kNo);
@@ -58,7 +59,7 @@
                                                         bool hasMixedSampledCoverage,
                                                         GrClampType clampType) {
     // Make sure the finalize happens before combining. We might change fNeedsStencil here.
-    SkASSERT(fPathList.begin().fCurr->fNext == nullptr);
+    SkASSERT(fPathStrokeList.begin().fCurr->fNext == nullptr);
     SkASSERT(fAAType != GrAAType::kCoverage || hasMixedSampledCoverage);
     const GrProcessorSet::Analysis& analysis = fProcessors.finalize(
             fColor, GrProcessorAnalysisCoverage::kNone, clip, &GrUserStencilSettings::kUnused,
@@ -69,22 +70,38 @@
 
 GrOp::CombineResult GrStrokeTessellateOp::onCombineIfPossible(GrOp* grOp, SkArenaAlloc* alloc,
                                                               const GrCaps&) {
+    using DynamicStroke = GrStrokeTessellateShader::DynamicStroke;
     SkASSERT(grOp->classID() == this->classID());
     auto* op = static_cast<GrStrokeTessellateOp*>(grOp);
+
     if (fNeedsStencil ||
         op->fNeedsStencil ||
         fColor != op->fColor ||
         fViewMatrix != op->fViewMatrix ||
         fAAType != op->fAAType ||
-        !fStroke.hasEqualEffect(op->fStroke) ||
-        fProcessors != op->fProcessors) {
+        fProcessors != op->fProcessors ||
+        this->headStroke().isHairlineStyle() != op->headStroke().isHairlineStyle()) {
         return CombineResult::kCannotCombine;
     }
 
-    fPathList.concat(std::move(op->fPathList), alloc);
-    fTotalCombinedVerbCnt += op->fTotalCombinedVerbCnt;
-    fHasConics |= op->fHasConics;
+    auto combinedFlags = fShaderFlags | op->fShaderFlags;
+    if (!(combinedFlags & ShaderFlags::kDynamicStroke) &&
+        !DynamicStroke::StrokesHaveEqualDynamicState(this->headStroke(), op->headStroke())) {
+        // The paths have different stroke properties. We will need to enable dynamic stroke if we
+        // still decide to combine them.
+        combinedFlags |= ShaderFlags::kDynamicStroke;
+    }
+    if (combinedFlags & ShaderFlags::kDynamicStroke) {
+        // Don't actually enable dynamic stroke on ops that already have lots of verbs.
+        if (!this->shouldUseDynamicState(ShaderFlags::kDynamicStroke) ||
+            !op->shouldUseDynamicState(ShaderFlags::kDynamicStroke)) {
+            return CombineResult::kCannotCombine;
+        }
+    }
 
+    fPathStrokeList.concat(std::move(op->fPathStrokeList), alloc);
+    fTotalCombinedVerbCnt += op->fTotalCombinedVerbCnt;
+    fShaderFlags = combinedFlags;
     return CombineResult::kMerged;
 }
 
@@ -127,11 +144,11 @@
     if (caps.shaderCaps()->tessellationSupport() &&
         fTotalCombinedVerbCnt > 50 &&
         !fProcessors.usesVaryingCoords()) {
-        fTessellator = arena->make<GrStrokeHardwareTessellator>(*caps.shaderCaps(), fViewMatrix,
-                                                                fStroke);
+        fTessellator = arena->make<GrStrokeHardwareTessellator>(fShaderFlags, *caps.shaderCaps());
         shaderMode = GrStrokeTessellateShader::Mode::kTessellation;
     } else {
-        fTessellator = arena->make<GrStrokeIndirectTessellator>(fViewMatrix, fPathList, fStroke,
+        fTessellator = arena->make<GrStrokeIndirectTessellator>(fShaderFlags, fViewMatrix,
+                                                                fPathStrokeList,
                                                                 fTotalCombinedVerbCnt, arena);
         shaderMode = GrStrokeTessellateShader::Mode::kIndirect;
     }
@@ -147,7 +164,7 @@
     }
 
     auto* strokeTessellateShader = arena->make<GrStrokeTessellateShader>(
-            shaderMode, fHasConics, fStroke, fViewMatrix, fColor);
+            shaderMode, fShaderFlags, fViewMatrix, this->headStroke(), fColor);
     auto* fillPipeline = GrFillPathShader::MakeFillPassPipeline(args, fAAType, std::move(clip),
                                                                 std::move(fProcessors));
     auto fillStencil = &GrUserStencilSettings::kUnused;
@@ -188,7 +205,7 @@
                                     flushState->detachAppliedClip());
     }
     SkASSERT(fTessellator);
-    fTessellator->prepare(flushState, fViewMatrix, fPathList, fStroke, fTotalCombinedVerbCnt);
+    fTessellator->prepare(flushState, fViewMatrix, fPathStrokeList, fTotalCombinedVerbCnt);
 }
 
 void GrStrokeTessellateOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
diff --git a/src/gpu/tessellate/GrStrokeTessellateOp.h b/src/gpu/tessellate/GrStrokeTessellateOp.h
index 4a63d0a..2278fbc 100644
--- a/src/gpu/tessellate/GrStrokeTessellateOp.h
+++ b/src/gpu/tessellate/GrStrokeTessellateOp.h
@@ -12,37 +12,61 @@
 #include "src/gpu/GrSTArenaList.h"
 #include "src/gpu/ops/GrMeshDrawOp.h"
 #include "src/gpu/tessellate/GrPathShader.h"
+#include "src/gpu/tessellate/GrStrokeTessellateShader.h"
 
 class GrRecordingContext;
 
 // Prepares GPU data for, and then draws a stroke's tessellated geometry.
 class GrStrokeTessellator {
 public:
-    // Called before draw(). Prepares GPU buffers containing the geometry to tessellate.
-    virtual void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const GrSTArenaList<SkPath>&,
-                         const SkStrokeRec&, int totalCombinedVerbCnt) = 0;
+    using ShaderFlags = GrStrokeTessellateShader::ShaderFlags;
 
-    // Issues draw calls for the tessellated stroie. The caller is responsible for binding its
+    GrStrokeTessellator(ShaderFlags shaderFlags) : fShaderFlags(shaderFlags) {}
+
+    struct PathStroke {
+        PathStroke(const SkPath& path, const SkStrokeRec& stroke) : fPath(path), fStroke(stroke) {}
+        SkPath fPath;
+        SkStrokeRec fStroke;
+    };
+
+    // Called before draw(). Prepares GPU buffers containing the geometry to tessellate.
+    virtual void prepare(GrMeshDrawOp::Target*, const SkMatrix&, const GrSTArenaList<PathStroke>&,
+                         int totalCombinedVerbCnt) = 0;
+
+    // Issues draw calls for the tessellated stroke. The caller is responsible for binding its
     // desired pipeline ahead of time.
     virtual void draw(GrOpFlushState*) const = 0;
 
     virtual ~GrStrokeTessellator() {}
-};
-
-// Base class for ops that render opaque, constant-color strokes by linearizing them into sorted
-// "parametric" and "radial" edges. See GrStrokeTessellateShader.
-class GrStrokeTessellateOp : public GrDrawOp {
-public:
-    // The provided matrix must be a similarity matrix for the time being. This is so we can
-    // bootstrap this Op on top of GrStrokeGeometry with minimal modifications.
-    //
-    // Patches can overlap, so until a stencil technique is implemented, the provided paint must be
-    // a constant blended color.
-    GrStrokeTessellateOp(GrAAType, const SkMatrix&, const SkPath&, const SkStrokeRec&, GrPaint&&);
 
 protected:
+    const ShaderFlags fShaderFlags;
+};
+
+// Renders strokes by linearizing them into sorted "parametric" and "radial" edges. See
+// GrStrokeTessellateShader.
+class GrStrokeTessellateOp : public GrDrawOp {
+public:
+    GrStrokeTessellateOp(GrAAType, const SkMatrix&, const SkPath&, const SkStrokeRec&, GrPaint&&);
+
+private:
+    using ShaderFlags = GrStrokeTessellateShader::ShaderFlags;
+    using PathStroke = GrStrokeTessellator::PathStroke;
     DEFINE_OP_CLASS_ID
 
+    SkStrokeRec& headStroke() { return fPathStrokeList.head().fStroke; }
+
+    // Returns whether it is a good tradeoff to use the given dynamic state. Dynamic state improves
+    // batching, but if it isn't already enabled, it comes at the cost of having to write out more
+    // data with each patch or instance.
+    bool shouldUseDynamicState(ShaderFlags dynamicState) const {
+        // Use the dynamic state if either (1) the state is already enabled anyway, or (2) we don't
+        // have many verbs.
+        constexpr static int kMaxVerbsToEnableDynamicState = 50;
+        return (fShaderFlags & dynamicState) ||
+               (fTotalCombinedVerbCnt <= kMaxVerbsToEnableDynamicState);
+    }
+
     const char* name() const override { return "GrStrokeTessellateOp"; }
     void visitProxies(const VisitProxyFunc& fn) const override;
     FixedFunctionFlags fixedFunctionFlags() const override;
@@ -63,14 +87,13 @@
 
     const GrAAType fAAType;
     const SkMatrix fViewMatrix;
-    const SkStrokeRec fStroke;
     SkPMColor4f fColor;
     bool fNeedsStencil = false;
     GrProcessorSet fProcessors;
 
-    GrSTArenaList<SkPath> fPathList;
+    ShaderFlags fShaderFlags = ShaderFlags::kNone;
+    GrSTArenaList<PathStroke> fPathStrokeList;
     int fTotalCombinedVerbCnt = 0;
-    bool fHasConics = false;
 
     GrStrokeTessellator* fTessellator = nullptr;
     const GrProgramInfo* fStencilProgram = nullptr;  // Only used if the stroke has transparency.
diff --git a/src/gpu/tessellate/GrStrokeTessellateShader.cpp b/src/gpu/tessellate/GrStrokeTessellateShader.cpp
index abb33c5..cdc9eee 100644
--- a/src/gpu/tessellate/GrStrokeTessellateShader.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellateShader.cpp
@@ -45,6 +45,11 @@
     return dot(v, v);
 })";
 
+static const char* kNumRadialSegmentsPerRadian = R"(
+float num_radial_segments_per_radian(float parametricIntolerance, float strokeRadius) {
+    return .5 / acos(max(1.0 - 1.0/(parametricIntolerance * strokeRadius), -1.0));
+})";
+
 // Unlike mix(), this does not return b when t==1. But it otherwise seems to get better
 // precision than "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
 // We override this result anyway when t==1 so it shouldn't be a problem.
@@ -55,8 +60,8 @@
 float2 unchecked_mix(float2 a, float2 b, float T) {
     return fma(b - a, float2(T), a);
 }
-float4 unchecked_mix(float4 a, float4 b, float4 t) {
-    return fma(b - a, t, a);
+float4 unchecked_mix(float4 a, float4 b, float4 T) {
+    return fma(b - a, T, a);
 })";
 
 // Calculates the number of evenly spaced (in the parametric sense) segments to chop a cubic into.
@@ -79,15 +84,6 @@
     })");
 }
 
-static float get_join_type(const SkStrokeRec& stroke) {
-    switch (stroke.getJoin()) {
-        case SkPaint::kRound_Join: return -1;
-        case SkPaint::kBevel_Join: return 0;
-        case SkPaint::kMiter_Join: SkASSERT(stroke.getMiter() >= 0); return stroke.getMiter();
-    }
-    SkUNREACHABLE;
-}
-
 class GrStrokeTessellateShader::TessellationImpl : public GrGLSLGeometryProcessor {
 public:
     const char* getTessArgsUniformName(const GrGLSLUniformHandler& uniformHandler) const {
@@ -108,34 +104,7 @@
 
         args.fVaryingHandler->emitAttributes(shader);
 
-        // uParametricIntolerance, uNumRadialSegmentsPerRadian, uJoinType, uStrokeRadius.
-        const char* tessArgsName;
-        fTessArgsUniform = uniHandler->addUniform(nullptr,
-                                                  kVertex_GrShaderFlag |
-                                                  kTessControl_GrShaderFlag |
-                                                  kTessEvaluation_GrShaderFlag,
-                                                  kFloat4_GrSLType, "tessArgs", &tessArgsName);
-        v->codeAppendf("float uNumRadialSegmentsPerRadian = %s.y;\n", tessArgsName);
-        v->codeAppendf("float uJoinType = %s.z;\n", tessArgsName);
-
-        if (!shader.viewMatrix().isIdentity()) {
-            fTranslateUniform = uniHandler->addUniform(nullptr, kTessEvaluation_GrShaderFlag,
-                                                       kFloat2_GrSLType, "translate", nullptr);
-            const char* affineMatrixName;
-            // Hairlines apply the affine matrix in their vertex shader, prior to tessellation.
-            // Otherwise the entire view matrix gets applied at the end of the tess eval shader.
-            auto affineMatrixVisibility = (shader.fStroke.isHairlineStyle()) ?
-                    kVertex_GrShaderFlag : kTessEvaluation_GrShaderFlag;
-            fAffineMatrixUniform = uniHandler->addUniform(nullptr, affineMatrixVisibility,
-                                                          kFloat4_GrSLType, "affineMatrix",
-                                                          &affineMatrixName);
-            if (affineMatrixVisibility & kVertex_GrShaderFlag) {
-                v->codeAppendf("float2x2 uAffineMatrix = float2x2(%s);\n", affineMatrixName);
-            }
-        }
-        const char* colorUniformName;
-        fColorUniform = uniHandler->addUniform(nullptr, kFragment_GrShaderFlag, kHalf4_GrSLType,
-                                               "color", &colorUniformName);
+        v->defineConstant("float", "PI", "3.141592653589793238");
 
         // The vertex shader chops the curve into 3 sections in order to meet our tessellation
         // requirements. The stroke tessellator does not allow curve sections to inflect or to
@@ -162,38 +131,84 @@
         v->declareGlobal(GrShaderVar("vsPts89", kFloat4_GrSLType, TypeModifier::Out));
         v->declareGlobal(GrShaderVar("vsTans01", kFloat4_GrSLType, TypeModifier::Out));
         v->declareGlobal(GrShaderVar("vsTans23", kFloat4_GrSLType, TypeModifier::Out));
-
-        v->defineConstant("float", "PI", "3.141592653589793238");
+        if (shader.hasDynamicStroke()) {
+            // [NUM_RADIAL_SEGMENTS_PER_RADIAN, STROKE_RADIUS]
+            v->declareGlobal(GrShaderVar("vsStrokeArgs", kFloat2_GrSLType, TypeModifier::Out));
+        }
 
         v->insertFunction(kAtan2Fn);
         v->insertFunction(kCosineBetweenVectorsFn);
         v->insertFunction(kMiterExtentFn);
         v->insertFunction(kUncheckedMixFn);
         v->insertFunction(kLengthPow2Fn);
+        if (shader.hasDynamicStroke()) {
+            v->insertFunction(kNumRadialSegmentsPerRadian);
+        }
 
-        v->codeAppendf(R"(
+        if (!shader.hasDynamicStroke()) {
+            // [PARAMETRIC_INTOLERANCE, NUM_RADIAL_SEGMENTS_PER_RADIAN, JOIN_TYPE, STROKE_RADIUS]
+            const char* tessArgsName;
+            fTessArgsUniform = uniHandler->addUniform(nullptr,
+                                                      kVertex_GrShaderFlag |
+                                                      kTessControl_GrShaderFlag |
+                                                      kTessEvaluation_GrShaderFlag,
+                                                      kFloat4_GrSLType, "tessArgs", &tessArgsName);
+            v->codeAppendf(R"(
+            float NUM_RADIAL_SEGMENTS_PER_RADIAN = %s.y;
+            float JOIN_TYPE = %s.z;)", tessArgsName, tessArgsName);
+        } else {
+            const char* parametricIntoleranceName;
+            fTessArgsUniform = uniHandler->addUniform(nullptr,
+                                                      kVertex_GrShaderFlag |
+                                                      kTessControl_GrShaderFlag |
+                                                      kTessEvaluation_GrShaderFlag,
+                                                      kFloat_GrSLType, "parametricIntolerance",
+                                                      &parametricIntoleranceName);
+            v->codeAppendf(R"(
+            float STROKE_RADIUS = dynamicStrokeAttr.x;
+            float NUM_RADIAL_SEGMENTS_PER_RADIAN = num_radial_segments_per_radian(%s,STROKE_RADIUS);
+            float JOIN_TYPE = dynamicStrokeAttr.y;)", parametricIntoleranceName);
+        }
+
+        if (!shader.viewMatrix().isIdentity()) {
+            fTranslateUniform = uniHandler->addUniform(nullptr, kTessEvaluation_GrShaderFlag,
+                                                       kFloat2_GrSLType, "translate", nullptr);
+            const char* affineMatrixName;
+            // Hairlines apply the affine matrix in their vertex shader, prior to tessellation.
+            // Otherwise the entire view matrix gets applied at the end of the tess eval shader.
+            auto affineMatrixVisibility = (shader.fStroke.isHairlineStyle()) ?
+                    kVertex_GrShaderFlag : kTessEvaluation_GrShaderFlag;
+            fAffineMatrixUniform = uniHandler->addUniform(nullptr, affineMatrixVisibility,
+                                                          kFloat4_GrSLType, "affineMatrix",
+                                                          &affineMatrixName);
+            if (affineMatrixVisibility & kVertex_GrShaderFlag) {
+                v->codeAppendf("float2x2 AFFINE_MATRIX = float2x2(%s);\n", affineMatrixName);
+            }
+        }
+
+        v->codeAppend(R"(
         // Unpack the control points.
-        float2 prevControlPoint = inputPrevCtrlPt;
-        float4x2 P = float4x2(inputPts01, inputPts23);)");
+        float2 prevControlPoint = prevCtrlPtAttr;
+        float4x2 P = float4x2(pts01Attr, pts23Attr);)");
 
         if (shader.fStroke.isHairlineStyle() && !shader.viewMatrix().isIdentity()) {
             // Hairline case. Transform the points before tessellation. We can still hold off on the
             // translate until the end; we just need to perform the scale and skew right now.
-            if (shader.fHasConics) {
+            if (shader.hasConics()) {
                 v->codeAppend(R"(
-                P[0] = uAffineMatrix * P[0];
-                P[1] = uAffineMatrix * P[1];
-                P[2] = uAffineMatrix * P[2];
-                P[3] = isinf(P[3].y) ? P[3] : uAffineMatrix * P[3];)");
+                P[0] = AFFINE_MATRIX * P[0];
+                P[1] = AFFINE_MATRIX * P[1];
+                P[2] = AFFINE_MATRIX * P[2];
+                P[3] = isinf(P[3].y) ? P[3] : AFFINE_MATRIX * P[3];)");
             } else {
                 v->codeAppend(R"(
-                P = uAffineMatrix * P;)");
+                P = AFFINE_MATRIX * P;)");
             }
             v->codeAppend(R"(
-            prevControlPoint = uAffineMatrix * prevControlPoint;)");
+            prevControlPoint = AFFINE_MATRIX * prevControlPoint;)");
         }
 
-        v->codeAppendf(R"(
+        v->codeAppend(R"(
         // Find the tangents. It's imperative that we compute these tangents from the original
         // (pre-chopping) input points or else the seams might crack.
         float2 prevJoinTangent = P[0] - prevControlPoint;
@@ -219,23 +234,23 @@
         if (cross(prevJoinTangent, tan0) < 0) {
             joinRotation = -joinRotation;
         }
-        float joinRadialSegments = abs(joinRotation) * uNumRadialSegmentsPerRadian;
+        float joinRadialSegments = abs(joinRotation) * NUM_RADIAL_SEGMENTS_PER_RADIAN;
         float numSegmentsInJoin = (joinRadialSegments != 0 /*Is the join non-empty?*/ &&
-                                   uJoinType >= 0 /*Is the join not a round type?*/)
-                ? sign(uJoinType) + 1  // Non-empty bevel joins have 1 segment and miters have 2.
+                                   JOIN_TYPE >= 0 /*Is the join not a round type?*/)
+                ? sign(JOIN_TYPE) + 1  // Non-empty bevel joins have 1 segment and miters have 2.
                 : ceil(joinRadialSegments);  // Otherwise round up the number of radial segments.
 
         // Extends the middle join edge to the miter point.
         float innerJoinRadiusMultiplier = 1;
-        if (uJoinType > 0 /*Is the join a miter type?*/) {
-            innerJoinRadiusMultiplier = miter_extent(cosTheta, uJoinType/*miterLimit*/);
+        if (JOIN_TYPE > 0 /*Is the join a miter type?*/) {
+            innerJoinRadiusMultiplier = miter_extent(cosTheta, JOIN_TYPE/*miterLimit*/);
         }
 
         // Clamps join geometry to the exterior side of the junction.
         float2 joinOutsetClamp = float2(-1, 1);
         if (joinRadialSegments > .1 /*Does the join rotate more than 1/10 of a segment?*/) {
             // Only clamp if the join angle is large enough to guarantee there won't be cracks on
-            // the interior side.
+            // the interior side of the junction.
             joinOutsetClamp = (joinRotation > 0) ? float2(-1, 0) : float2(0, 1);
         }
 
@@ -361,8 +376,15 @@
         vsPts89 = float4(cd.zw, P[3]);
         vsTans01 = float4(tan0, innerTangents[0]);
         vsTans23 = float4(innerTangents[1], tan1);)");
+        if (shader.hasDynamicStroke()) {
+            v->codeAppend(R"(
+            vsStrokeArgs = float2(NUM_RADIAL_SEGMENTS_PER_RADIAN, STROKE_RADIUS);)");
+        }
 
         // The fragment shader just outputs a uniform color.
+        const char* colorUniformName;
+        fColorUniform = uniHandler->addUniform(nullptr, kFragment_GrShaderFlag, kHalf4_GrSLType,
+                                               "color", &colorUniformName);
         args.fFragBuilder->codeAppendf("%s = %s;", args.fOutputColor, colorUniformName);
         args.fFragBuilder->codeAppendf("%s = half4(1);", args.fOutputCoverage);
     }
@@ -371,26 +393,36 @@
                  const GrPrimitiveProcessor& primProc) override {
         const auto& shader = primProc.cast<GrStrokeTessellateShader>();
         const auto& stroke = shader.fStroke;
-        Tolerances tolerances;
-        if (!stroke.isHairlineStyle()) {
-            tolerances.set(shader.viewMatrix().getMaxScale(), stroke.getWidth());
+
+        if (!shader.hasDynamicStroke()) {
+            Tolerances tolerances;
+            if (!stroke.isHairlineStyle()) {
+                tolerances.set(shader.viewMatrix().getMaxScale(), stroke.getWidth());
+            } else {
+                // In the hairline case we transform prior to tessellation. Set up tolerances for an
+                // identity viewMatrix and a strokeWidth of 1.
+                tolerances.set(1, 1);
+            }
+            float strokeRadius = (stroke.isHairlineStyle()) ? .5f : stroke.getWidth() * .5;
+            pdman.set4f(fTessArgsUniform,
+                        tolerances.fParametricIntolerance,  // PARAMETRIC_INTOLERANCE
+                        tolerances.fNumRadialSegmentsPerRadian,  // NUM_RADIAL_SEGMENTS_PER_RADIAN
+                        GetJoinType(shader.fStroke),  // JOIN_TYPE
+                        strokeRadius);  // STROKE_RADIUS
         } else {
-            // In the hairline case we transform prior to tessellation. Set up tolerances for an
-            // identity viewMatrix and a strokeWidth of 1.
-            tolerances.set(1, 1);
+            SkASSERT(!stroke.isHairlineStyle());
+            pdman.set1f(fTessArgsUniform,
+                        Tolerances::CalcParametricIntolerance(shader.viewMatrix().getMaxScale()));
         }
-        float strokeRadius = (stroke.isHairlineStyle()) ? .5f : stroke.getWidth() * .5;
-        pdman.set4f(fTessArgsUniform,
-                    tolerances.fParametricIntolerance,  // uParametricIntolerance
-                    tolerances.fNumRadialSegmentsPerRadian,  // uNumRadialSegmentsPerRadian
-                    get_join_type(shader.fStroke),  // uJoinType
-                    strokeRadius);  // uStrokeRadius
+
+        // Set up the view matrix, if any.
         const SkMatrix& m = shader.viewMatrix();
         if (!m.isIdentity()) {
             pdman.set2f(fTranslateUniform, m.getTranslateX(), m.getTranslateY());
             pdman.set4f(fAffineMatrixUniform, m.getScaleX(), m.getSkewY(), m.getSkewX(),
                         m.getScaleY());
         }
+
         pdman.set4fv(fColorUniform, 1, shader.fColor.vec());
     }
 
@@ -417,15 +449,29 @@
     code.appendf("#define float2x2 mat2\n");
     code.appendf("#define float4x2 mat4x2\n");
     code.appendf("#define PI 3.141592653589793238\n");
-    code.appendf("#define MAX_TESSELLATION_SEGMENTS %i.0\n",
-                 shaderCaps.maxTessellationSegments());
+    code.appendf("#define MAX_TESSELLATION_SEGMENTS %i.0\n", shaderCaps.maxTessellationSegments());
+    code.appendf("#define cross cross2d\n");  // GLSL already has a function named "cross".
 
     const char* tessArgsName = impl->getTessArgsUniformName(uniformHandler);
-    code.appendf("uniform vec4 %s;\n", tessArgsName);
-    code.appendf("#define uParametricIntolerance %s.x\n", tessArgsName);
-    code.appendf("#define uNumRadialSegmentsPerRadian %s.y\n", tessArgsName);
+    if (!this->hasDynamicStroke()) {
+        code.appendf("uniform vec4 %s;\n", tessArgsName);
+        code.appendf("#define PARAMETRIC_INTOLERANCE %s.x\n", tessArgsName);
+        code.appendf("#define NUM_RADIAL_SEGMENTS_PER_RADIAN %s.y\n", tessArgsName);
+    } else {
+        code.appendf("uniform float %s;\n", tessArgsName);
+        code.appendf("#define PARAMETRIC_INTOLERANCE %s\n", tessArgsName);
+        code.appendf("#define NUM_RADIAL_SEGMENTS_PER_RADIAN vsStrokeArgs[0].x\n");
+    }
 
-    code.appendf("#define cross cross2d\n");  // GLSL already has a function named "cross".
+    code.append(kLengthPow2Fn);
+    append_wangs_formula_fn(&code, this->hasConics());
+    code.append(kAtan2Fn);
+    code.append(kCosineBetweenVectorsFn);
+    code.append(kMiterExtentFn);
+    code.append(R"(
+    float cross2d(vec2 a, vec2 b) {
+        return determinant(mat2(a,b));
+    })");
 
     code.append(R"(
     in vec4 vsJoinArgs0[];
@@ -436,32 +482,36 @@
     in vec4 vsPts67[];
     in vec4 vsPts89[];
     in vec4 vsTans01[];
-    in vec4 vsTans23[];
+    in vec4 vsTans23[];)");
+    if (this->hasDynamicStroke()) {
+        code.append(R"(
+        in vec2 vsStrokeArgs[];)");
+    }
 
-    // [numSegmentsInJoin, innerJoinRadiusMultiplier, prevJoinTangent.xy]
-    patch out vec4 tcsJoinArgs0;
-    patch out vec4 tcsJoinArgs1;  // [joinAngle0, radsPerJoinSegment, joinOutsetClamp.xy]
-    patch out vec4 tcsEndPtEndTan;
+    code.append(R"(
     out vec4 tcsPts01[];
     out vec4 tcsPt2Tan0[];
     out vec4 tcsTessArgs[];  // [numCombinedSegments, numParametricSegments, angle0, radsPerSegment]
-
-    float cross2d(vec2 a, vec2 b) {
-        return determinant(mat2(a,b));
-    })");
-
-    code.append(kAtan2Fn);
-    code.append(kLengthPow2Fn);
-    append_wangs_formula_fn(&code, fHasConics);
-    code.append(kCosineBetweenVectorsFn);
-    code.append(kMiterExtentFn);
+    patch out vec4 tcsJoinArgs0; // [numSegmentsInJoin, innerJoinRadiusMultiplier,
+                                 //  prevJoinTangent.xy]
+    patch out vec4 tcsJoinArgs1;  // [joinAngle0, radsPerJoinSegment, joinOutsetClamp.xy]
+    patch out vec4 tcsEndPtEndTan;)");
+    if (this->hasDynamicStroke()) {
+        code.append(R"(
+        patch out float tcsStrokeRadius;)");
+    }
 
     code.append(R"(
     void main() {
         // Forward join args to the evaluation stage.
         tcsJoinArgs0 = vsJoinArgs0[0];
-        tcsJoinArgs1 = vsJoinArgs1[0];
+        tcsJoinArgs1 = vsJoinArgs1[0];)");
+    if (this->hasDynamicStroke()) {
+        code.append(R"(
+        tcsStrokeRadius = vsStrokeArgs[0].y;)");
+    }
 
+    code.append(R"(
         // Unpack the curve args from the vertex shader.
         mat4x2 P;
         mat2 tangents;
@@ -482,7 +532,7 @@
         // Calculate the number of parametric segments. The final tessellated strip will be a
         // composition of these parametric segments as well as radial segments.
         float w = isinf(P[3].y) ? P[3].x : -1.0; // w<0 means the curve is an integral cubic.
-        float numParametricSegments = wangs_formula(P, w, uParametricIntolerance);
+        float numParametricSegments = wangs_formula(P, w, PARAMETRIC_INTOLERANCE);
         if (P[0] == P[1] && P[2] == P[3]) {
             // This is how the patch builder articulates lineTos but Wang's formula returns
             // >>1 segment in this scenario. Assign 1 parametric segment.
@@ -512,7 +562,7 @@
         // Calculate the number of evenly spaced radial segments to chop this section of the curve
         // into. Radial segments divide the curve's rotation into even steps. The final tessellated
         // strip will be a composition of both parametric and radial segments.
-        float numRadialSegments = abs(rotation) * uNumRadialSegmentsPerRadian;
+        float numRadialSegments = abs(rotation) * NUM_RADIAL_SEGMENTS_PER_RADIAN;
         numRadialSegments = max(ceil(numRadialSegments), 1.0);
 
         // The first and last edges are shared by both the parametric and radial sets of edges, so
@@ -569,8 +619,7 @@
         gl_TessLevelOuter[1] = numTotalCombinedSegments;
         gl_TessLevelOuter[2] = 2.0;
         gl_TessLevelOuter[3] = numTotalCombinedSegments;
-    }
-    )");
+    })");
 
     return code;
 }
@@ -745,42 +794,49 @@
     code.appendf("#define float2x2 mat2\n");
     code.appendf("#define float3x2 mat3x2\n");
     code.appendf("#define float4x2 mat4x2\n");
-
-    // Use a #define to make extra sure we don't prevent the loop from unrolling.
+    code.appendf("#define PI 3.141592653589793238\n");
     code.appendf("#define MAX_PARAMETRIC_SEGMENTS_LOG2 %i\n",
                  SkNextLog2(shaderCaps.maxTessellationSegments()));
-    code.appendf("#define PI 3.141592653589793238\n");
 
-    const char* tessArgsName = impl->getTessArgsUniformName(uniformHandler);
-    code.appendf("uniform vec4 %s;\n", tessArgsName);
-    code.appendf("#define uStrokeRadius %s.w\n", tessArgsName);
+    if (!this->hasDynamicStroke()) {
+        const char* tessArgsName = impl->getTessArgsUniformName(uniformHandler);
+        code.appendf("uniform vec4 %s;\n", tessArgsName);
+        code.appendf("#define STROKE_RADIUS %s.w\n", tessArgsName);
+    } else {
+        code.appendf("#define STROKE_RADIUS tcsStrokeRadius\n");
+    }
 
     if (!this->viewMatrix().isIdentity()) {
         const char* translateName = impl->getTranslateUniformName(uniformHandler);
         code.appendf("uniform vec2 %s;\n", translateName);
-        code.appendf("#define uTranslate %s\n", translateName);
+        code.appendf("#define TRANSLATE %s\n", translateName);
         if (!fStroke.isHairlineStyle()) {
             // In the normal case we need the affine matrix too. (In the hairline case we already
             // applied the affine matrix in the vertex shader.)
             const char* affineMatrixName = impl->getAffineMatrixUniformName(uniformHandler);
             code.appendf("uniform vec4 %s;\n", affineMatrixName);
-            code.appendf("#define uAffineMatrix mat2(%s)\n", affineMatrixName);
+            code.appendf("#define AFFINE_MATRIX mat2(%s)\n", affineMatrixName);
         }
     }
 
     code.append(R"(
-    // [numSegmentsInJoin, innerJoinRadiusMultiplier, prevJoinTangent.xy]
-    patch in vec4 tcsJoinArgs0;
-    patch in vec4 tcsJoinArgs1;  // [joinAngle0, radsPerJoinSegment, joinOutsetClamp.xy]
-    patch in vec4 tcsEndPtEndTan;
     in vec4 tcsPts01[];
     in vec4 tcsPt2Tan0[];
     in vec4 tcsTessArgs[];  // [numCombinedSegments, numParametricSegments, angle0, radsPerSegment]
+    patch in vec4 tcsJoinArgs0;  // [numSegmentsInJoin, innerJoinRadiusMultiplier,
+                                 //  prevJoinTangent.xy]
+    patch in vec4 tcsJoinArgs1;  // [joinAngle0, radsPerJoinSegment, joinOutsetClamp.xy]
+    patch in vec4 tcsEndPtEndTan;)");
+    if (this->hasDynamicStroke()) {
+        code.append(R"(
+        patch in float tcsStrokeRadius;)");
+    }
 
+    code.append(R"(
     uniform vec4 sk_RTAdjust;)");
 
     code.append(kUncheckedMixFn);
-    append_eval_stroke_edge_fn(&code, fHasConics);
+    append_eval_stroke_edge_fn(&code, this->hasConics());
 
     code.append(R"(
     void main() {
@@ -799,7 +855,7 @@
         mat4x2 P;
         vec2 tan0;
         vec3 tessellationArgs;
-        float strokeRadius = uStrokeRadius;
+        float strokeRadius = STROKE_RADIUS;
         vec2 strokeOutsetClamp = vec2(-1, 1);
         if (localEdgeID < numSegmentsInJoin || numSegmentsInJoin == numTotalCombinedSegments) {
             // Our edge belongs to the join preceding the curve.
@@ -831,7 +887,7 @@
 
         float w = -1.0;  // w<0 means the curve is an integral cubic.)");
 
-    if (fHasConics) {
+    if (this->hasConics()) {
         code.append(R"(
         if (isinf(P[3].y)) {
             w = P[3].x;  // The curve is actually a conic.
@@ -863,23 +919,21 @@
         outset = clamp(outset, strokeOutsetClamp.x, strokeOutsetClamp.y);
         outset *= strokeRadius;
 
-        vec2 vertexPos = position + normalize(vec2(-tangent.y, tangent.x)) * outset;
-    )");
+        vec2 vertexPos = position + normalize(vec2(-tangent.y, tangent.x)) * outset;)");
 
     if (!this->viewMatrix().isIdentity()) {
         if (!fStroke.isHairlineStyle()) {
             // Normal case. Do the transform after tessellation.
-            code.append("vertexPos = uAffineMatrix * vertexPos + uTranslate;");
+            code.append("vertexPos = AFFINE_MATRIX * vertexPos + TRANSLATE;");
         } else {
             // Hairline case. The scale and skew already happened before tessellation.
-            code.append("vertexPos = vertexPos + uTranslate;");
+            code.append("vertexPos = vertexPos + TRANSLATE;");
         }
     }
 
     code.append(R"(
         gl_Position = vec4(vertexPos * sk_RTAdjust.xz + sk_RTAdjust.yw, 0.0, 1.0);
-    }
-    )");
+    })");
 
     return code;
 }
@@ -896,22 +950,41 @@
         args.fVertBuilder->defineConstant("float", "PI", "3.141592653589793238");
 
         // Helper functions.
+        if (shader.hasDynamicStroke()) {
+            args.fVertBuilder->insertFunction(kNumRadialSegmentsPerRadian);
+        }
         args.fVertBuilder->insertFunction(kAtan2Fn);
         args.fVertBuilder->insertFunction(kLengthPow2Fn);
         args.fVertBuilder->insertFunction(kMiterExtentFn);
         args.fVertBuilder->insertFunction(kUncheckedMixFn);
         args.fVertBuilder->insertFunction(kCosineBetweenVectorsFn);
-        append_wangs_formula_fn(&args.fVertBuilder->functions(), shader.fHasConics);
-        append_eval_stroke_edge_fn(&args.fVertBuilder->functions(), shader.fHasConics);
+        append_wangs_formula_fn(&args.fVertBuilder->functions(), shader.hasConics());
+        append_eval_stroke_edge_fn(&args.fVertBuilder->functions(), shader.hasConics());
 
-        // Tessellation control uniforms.
-        const char* tessArgsName;
-        fTessControlArgsUniform = args.fUniformHandler->addUniform(
-                nullptr, kVertex_GrShaderFlag, kFloat4_GrSLType, "tessControlArgs", &tessArgsName);
-        args.fVertBuilder->codeAppendf("float uParametricIntolerance = %s.x;\n", tessArgsName);
-        args.fVertBuilder->codeAppendf("float uNumRadialSegmentsPerRadian = %s.y;\n", tessArgsName);
-        args.fVertBuilder->codeAppendf("float uMiterLimit = %s.z;\n", tessArgsName);
-        args.fVertBuilder->codeAppendf("float uStrokeRadius = %s.w;\n", tessArgsName);
+        // Tessellation control uniforms and/or dynamic attributes.
+        if (!shader.hasDynamicStroke()) {
+            // [PARAMETRIC_INTOLERANCE, NUM_RADIAL_SEGMENTS_PER_RADIAN, JOIN_TYPE, STROKE_RADIUS]
+            const char* tessArgsName;
+            fTessControlArgsUniform = args.fUniformHandler->addUniform(
+                    nullptr, kVertex_GrShaderFlag, kFloat4_GrSLType, "tessControlArgs",
+                    &tessArgsName);
+            args.fVertBuilder->codeAppendf(R"(
+            float PARAMETRIC_INTOLERANCE = %s.x;
+            float NUM_RADIAL_SEGMENTS_PER_RADIAN = %s.y;
+            float JOIN_TYPE = %s.z;
+            float STROKE_RADIUS = %s.w;)", tessArgsName, tessArgsName, tessArgsName, tessArgsName);
+        } else {
+            const char* parametricIntoleranceName;
+            fTessControlArgsUniform = args.fUniformHandler->addUniform(
+                    nullptr, kVertex_GrShaderFlag, kFloat_GrSLType, "parametricIntolerance",
+                    &parametricIntoleranceName);
+            args.fVertBuilder->codeAppendf(R"(
+            float PARAMETRIC_INTOLERANCE = %s;
+            float STROKE_RADIUS = dynamicStrokeAttr.x;
+            float NUM_RADIAL_SEGMENTS_PER_RADIAN = num_radial_segments_per_radian(
+                    PARAMETRIC_INTOLERANCE, STROKE_RADIUS);
+            float JOIN_TYPE = dynamicStrokeAttr.y;)", parametricIntoleranceName);
+        }
 
         // View matrix uniforms.
         if (!shader.viewMatrix().isIdentity()) {
@@ -921,17 +994,17 @@
                     &affineMatrixName);
             fTranslateUniform = args.fUniformHandler->addUniform(
                     nullptr, kVertex_GrShaderFlag, kFloat2_GrSLType, "translate", &translateName);
-            args.fVertBuilder->codeAppendf("float2x2 uAffineMatrix = float2x2(%s);\n",
+            args.fVertBuilder->codeAppendf("float2x2 AFFINE_MATRIX = float2x2(%s);\n",
                                            affineMatrixName);
-            args.fVertBuilder->codeAppendf("float2 uTranslate = %s;\n", translateName);
+            args.fVertBuilder->codeAppendf("float2 TRANSLATE = %s;\n", translateName);
         }
 
         // Tessellation code.
         args.fVertBuilder->codeAppend(R"(
-        float4x2 P = float4x2(pts01, pts23);
-        float2 lastControlPoint = args.xy;
+        float4x2 P = float4x2(pts01Attr, pts23Attr);
+        float2 lastControlPoint = argsAttr.xy;
         float w = -1;  // w<0 means the curve is an integral cubic.)");
-        if (shader.fHasConics) {
+        if (shader.hasConics()) {
             args.fVertBuilder->codeAppend(R"(
             if (isinf(P[3].y)) {
                 w = P[3].x;  // The curve is actually a conic.
@@ -942,14 +1015,14 @@
             // Hairline case. Transform the points before tessellation. We can still hold off on the
             // translate until the end; we just need to perform the scale and skew right now.
             args.fVertBuilder->codeAppend(R"(
-            P = uAffineMatrix * P;
-            lastControlPoint = uAffineMatrix * lastControlPoint;)");
+            P = AFFINE_MATRIX * P;
+            lastControlPoint = AFFINE_MATRIX * lastControlPoint;)");
         }
         args.fVertBuilder->codeAppend(R"(
-        float numTotalEdges = abs(args.z);
+        float numTotalEdges = abs(argsAttr.z);
 
         // Find how many parametric segments this stroke requires.
-        float numParametricSegments = min(wangs_formula(P, w, uParametricIntolerance),
+        float numParametricSegments = min(wangs_formula(P, w, PARAMETRIC_INTOLERANCE),
                                           float(1 << MAX_PARAMETRIC_SEGMENTS_LOG2));
         if (P[0] == P[1] && P[2] == P[3]) {
             // This is how we describe lines, but Wang's formula does not return 1 in this case.
@@ -966,14 +1039,14 @@
             tan1 = float2(-1,0);
         })");
 
-        if (shader.fStroke.getJoin() == SkPaint::kRound_Join) {
+        if (shader.fStroke.getJoin() == SkPaint::kRound_Join || shader.hasDynamicStroke()) {
             args.fVertBuilder->codeAppend(R"(
             // Determine how many edges to give to the round join. We emit the first and final edges
             // of the join twice: once full width and once restricted to half width. This guarantees
             // perfect seaming by matching the vertices from the join as well as from the strokes on
             // either side.
             float joinRads = acos(cosine_between_vectors(P[0] - lastControlPoint, tan0));
-            float numRadialSegmentsInJoin = max(ceil(joinRads * uNumRadialSegmentsPerRadian), 1);
+            float numRadialSegmentsInJoin = max(ceil(joinRads * NUM_RADIAL_SEGMENTS_PER_RADIAN), 1);
             // +2 because we emit the beginning and ending edges twice (see above comment).
             float numEdgesInJoin = numRadialSegmentsInJoin + 2;
             // The stroke section needs at least two edges. Don't assign more to the join than
@@ -983,11 +1056,19 @@
             if (numParametricSegments == 1) {
                 numEdgesInJoin = numTotalEdges - 2;
             }
-            // Negative args.z means the join is a chop, and chop joins get exactly one segment.
-            if (args.z < 0) {
+            // Negative argsAttr.z means the join is a chop, and chop joins get exactly one segment.
+            if (argsAttr.z < 0) {
                 // +2 because we emit the beginning and ending edges twice (see above comment).
                 numEdgesInJoin = 1 + 2;
             })");
+            if (shader.hasDynamicStroke()) {
+                args.fVertBuilder->codeAppend(R"(
+                if (JOIN_TYPE >= 0 /*Is the join not a round type?*/) {
+                    // Bevel and miter joins get 1 and 2 segments respectively.
+                    // +2 because we emit the beginning and ending edges twice (see above comments).
+                    numEdgesInJoin = sign(JOIN_TYPE) + 1 + 2;
+                })");
+            }
         } else {
             args.fVertBuilder->codeAppendf(R"(
             float numEdgesInJoin = %i;)", NumExtraEdgesInIndirectJoin(joinType));
@@ -1057,12 +1138,12 @@
         }
         float radsPerSegment = rotation / numRadialSegments;)");
 
-        if (joinType == SkPaint::kMiter_Join) {
-            args.fVertBuilder->codeAppend(R"(
+        if (joinType == SkPaint::kMiter_Join || shader.hasDynamicStroke()) {
+            args.fVertBuilder->codeAppendf(R"(
             // Vertices #4 and #5 belong to the edge of the join that extends to the miter point.
-            if ((sk_VertexID | 1) == (4 | 5)) {
-                outset *= miter_extent(cosTheta, uMiterLimit);
-            })");
+            if ((sk_VertexID | 1) == (4 | 5) && %s) {
+                outset *= miter_extent(cosTheta, JOIN_TYPE/*miterLimit*/);
+            })", shader.hasDynamicStroke() ? "JOIN_TYPE > 0/*Is the join a miter type?*/" : "true");
         }
 
         args.fVertBuilder->codeAppendf(R"(
@@ -1086,7 +1167,7 @@
         }
 
         float2 ortho = normalize(float2(tangent.y, -tangent.x));
-        strokeCoord += ortho * (uStrokeRadius * outset);)");
+        strokeCoord += ortho * (STROKE_RADIUS * outset);)");
 
         if (shader.viewMatrix().isIdentity()) {
             // No transform matrix.
@@ -1095,14 +1176,14 @@
         } else if (!shader.fStroke.isHairlineStyle()) {
             // Normal case. Do the transform after tessellation.
             args.fVertBuilder->codeAppend(R"(
-            float2 devCoord = uAffineMatrix * strokeCoord + uTranslate;)");
+            float2 devCoord = AFFINE_MATRIX * strokeCoord + TRANSLATE;)");
             gpArgs->fPositionVar.set(kFloat2_GrSLType, "devCoord");
             gpArgs->fLocalCoordVar.set(kFloat2_GrSLType, "strokeCoord");
         } else {
             // Hairline case. The scale and skew already happened before tessellation.
             args.fVertBuilder->codeAppend(R"(
-            float2 devCoord = strokeCoord + uTranslate;
-            float2 localCoord = inverse(uAffineMatrix) * strokeCoord;)");
+            float2 devCoord = strokeCoord + TRANSLATE;
+            float2 localCoord = inverse(AFFINE_MATRIX) * strokeCoord;)");
             gpArgs->fPositionVar.set(kFloat2_GrSLType, "devCoord");
             gpArgs->fLocalCoordVar.set(kFloat2_GrSLType, "localCoord");
         }
@@ -1120,20 +1201,27 @@
         const auto& shader = primProc.cast<GrStrokeTessellateShader>();
         const auto& stroke = shader.fStroke;
 
-        // Set up the tessellation control uniforms.
-        Tolerances tolerances;
-        if (!stroke.isHairlineStyle()) {
-            tolerances.set(shader.viewMatrix().getMaxScale(), stroke.getWidth());
+        if (!shader.hasDynamicStroke()) {
+            // Set up the tessellation control uniforms.
+            Tolerances tolerances;
+            if (!stroke.isHairlineStyle()) {
+                tolerances.set(shader.viewMatrix().getMaxScale(), stroke.getWidth());
+            } else {
+                // In the hairline case we transform prior to tessellation. Set up tolerances for an
+                // identity viewMatrix and a strokeWidth of 1.
+                tolerances.set(1, 1);
+            }
+            float strokeRadius = (stroke.isHairlineStyle()) ? .5f : stroke.getWidth() * .5;
+            pdman.set4f(fTessControlArgsUniform,
+                        tolerances.fParametricIntolerance,  // PARAMETRIC_INTOLERANCE
+                        tolerances.fNumRadialSegmentsPerRadian,  // NUM_RADIAL_SEGMENTS_PER_RADIAN
+                        GetJoinType(shader.fStroke),  // JOIN_TYPE
+                        strokeRadius);  // STROKE_RADIUS
         } else {
-            // In the hairline case we transform prior to tessellation. Set up tolerances for an
-            // identity viewMatrix and a strokeWidth of 1.
-            tolerances.set(1, 1);
+            SkASSERT(!stroke.isHairlineStyle());
+            pdman.set1f(fTessControlArgsUniform,
+                        Tolerances::CalcParametricIntolerance(shader.viewMatrix().getMaxScale()));
         }
-        pdman.set4f(fTessControlArgsUniform,
-                    tolerances.fParametricIntolerance,  // uParametricIntolerance
-                    tolerances.fNumRadialSegmentsPerRadian,  // uNumRadialSegmentsPerRadian
-                    shader.fStroke.getMiter(),  // uMiterLimit
-                    (stroke.isHairlineStyle()) ? .5f : stroke.getWidth() * .5);  // uStrokeRadius
 
         // Set up the view matrix, if any.
         const SkMatrix& m = shader.viewMatrix();
@@ -1154,14 +1242,13 @@
 
 void GrStrokeTessellateShader::getGLSLProcessorKey(const GrShaderCaps&,
                                                    GrProcessorKeyBuilder* b) const {
-    uint32_t key = this->viewMatrix().isIdentity();
-    if (fMode == Mode::kIndirect) {
-        SkASSERT(fStroke.getJoin() >> 2 == 0);
-        key = (key << 2) | fStroke.getJoin();
-    }
+    bool keyNeedsJoin = fMode == Mode::kIndirect && !(fShaderFlags & ShaderFlags::kDynamicStroke);
+    SkASSERT(fStroke.getJoin() >> 2 == 0);
+    uint32_t key = (uint32_t)fShaderFlags;
+    key = (key << 1) | (uint32_t)fMode;
+    key = (key << 2) | ((keyNeedsJoin) ? fStroke.getJoin() : 0);
     key = (key << 1) | (uint32_t)fStroke.isHairlineStyle();
-    key = (key << 1) | (uint32_t)fHasConics;
-    key = (key << 1) | (uint32_t)fMode;  // Must be last.
+    key = (key << 1) | (uint32_t)this->viewMatrix().isIdentity();
     b->add32(key);
 }
 
diff --git a/src/gpu/tessellate/GrStrokeTessellateShader.h b/src/gpu/tessellate/GrStrokeTessellateShader.h
index 7219090..d146917 100644
--- a/src/gpu/tessellate/GrStrokeTessellateShader.h
+++ b/src/gpu/tessellate/GrStrokeTessellateShader.h
@@ -31,13 +31,13 @@
         kIndirect
     };
 
-    // Size in bytes of a basic tessellation patch without any dynamic attribs like stroke params or
-    // color.
-    constexpr static size_t kTessellationPatchBaseStride = sizeof(SkPoint) * 5;
+    enum class ShaderFlags {
+        kNone          = 0,
+        kHasConics     = 1 << 0,
+        kDynamicStroke = 1 << 1  // Each patch or instance has its own stroke width and join type.
+    };
 
-    // Size in bytes of a basic indirect draw instance without any dynamic attribs like stroke
-    // params or color.
-    constexpr static size_t kIndirectInstanceBaseStride = sizeof(float) * 11;
+    GR_DECL_BITFIELD_CLASS_OPS_FRIENDS(ShaderFlags);
 
     // When using indirect draws, we expect a fixed number of additional edges to be appended onto
     // each instance in order to implement its preceding join. Specifically, each join emits:
@@ -67,16 +67,17 @@
     // These tolerances decide the number of parametric and radial segments the tessellator will
     // linearize curves into. These decisions are made in (pre-viewMatrix) local path space.
     struct Tolerances {
+        // See fParametricIntolerance.
+        constexpr static float CalcParametricIntolerance(float matrixMaxScale) {
+            return matrixMaxScale * GrTessellationPathRenderer::kLinearizationIntolerance;
+        }
         // Returns the equivalent tolerances in (pre-viewMatrix) local path space that the
         // tessellator will use when rendering this stroke.
-        static Tolerances MakePreTransform(const SkMatrix& viewMatrix, const SkStrokeRec& stroke) {
-            std::array<float,2> matrixScales;
-            if (!viewMatrix.getMinMaxScales(matrixScales.data())) {
-                matrixScales.fill(1);
-            }
-            auto [matrixMinScale, matrixMaxScale] = matrixScales;
-            float localStrokeWidth = stroke.getWidth();
-            if (stroke.isHairlineStyle()) {
+        static Tolerances MakePreTransform(const float matrixMinMaxScales[2], float strokeWidth) {
+            float matrixMaxScale = matrixMinMaxScales[1];
+            float localStrokeWidth = strokeWidth;
+            if (localStrokeWidth == 0) {
+                float matrixMinScale = matrixMinMaxScales[0];
                 // If the stroke is hairline then the tessellator will operate in post-transform
                 // space instead. But for the sake of CPU methods that need to conservatively
                 // approximate the number of segments to emit, we use
@@ -95,8 +96,7 @@
             this->set(matrixMaxScale, strokeWidth);
         }
         void set(float matrixMaxScale, float strokeWidth) {
-            fParametricIntolerance =
-                    matrixMaxScale * GrTessellationPathRenderer::kLinearizationIntolerance;
+            fParametricIntolerance = CalcParametricIntolerance(matrixMaxScale);
             fNumRadialSegmentsPerRadian =
                     .5f / acosf(std::max(1 - 2/(fParametricIntolerance * strokeWidth), -1.f));
         }
@@ -111,60 +111,114 @@
         float fNumRadialSegmentsPerRadian;
     };
 
+    // We encode all of a join's information in a single float value:
+    //
+    //     Negative => Round Join
+    //     Zero     => Bevel Join
+    //     Positive => Miter join, and the value is also the miter limit
+    //
+    static float GetJoinType(const SkStrokeRec& stroke) {
+        switch (stroke.getJoin()) {
+            case SkPaint::kRound_Join: return -1;
+            case SkPaint::kBevel_Join: return 0;
+            case SkPaint::kMiter_Join: SkASSERT(stroke.getMiter() >= 0); return stroke.getMiter();
+        }
+        SkUNREACHABLE;
+    }
+
+    // This struct gets written out to each patch or instance if kDynamicStroke is enabled.
+    struct DynamicStroke {
+        static bool StrokesHaveEqualDynamicState(const SkStrokeRec& a, const SkStrokeRec& b) {
+            return a.getWidth() == b.getWidth() && a.getJoin() == b.getJoin() &&
+                   (a.getJoin() != SkPaint::kMiter_Join || a.getMiter() == b.getMiter());
+        }
+        void set(const SkStrokeRec& stroke) {
+            fRadius = stroke.getWidth() * .5f;
+            fJoinType = GetJoinType(stroke);
+        }
+        float fRadius;
+        float fJoinType;  // See GetJoinType().
+    };
+
+    // Size in bytes of a tessellation patch with the given shader flags.
+    static size_t PatchStride(ShaderFlags shaderFlags) {
+        size_t stride = sizeof(SkPoint) * 5;
+        if (shaderFlags & ShaderFlags::kDynamicStroke) {
+            stride += sizeof(DynamicStroke);
+        }
+        return stride;
+    }
+
+    // Size in bytes of an indirect draw instance with the given shader flags.
+    static size_t IndirectInstanceStride(ShaderFlags shaderFlags) {
+        size_t stride = sizeof(float) * 11;
+        if (shaderFlags & ShaderFlags::kDynamicStroke) {
+            stride += sizeof(DynamicStroke);
+        }
+        return stride;
+    }
+
     // 'viewMatrix' is applied to the geometry post tessellation. It cannot have perspective.
-    GrStrokeTessellateShader(Mode mode, bool hasConics, const SkStrokeRec& stroke,
-                             const SkMatrix& viewMatrix, SkPMColor4f color)
+    GrStrokeTessellateShader(Mode mode, ShaderFlags shaderFlags, const SkMatrix& viewMatrix,
+                             const SkStrokeRec& stroke, SkPMColor4f color)
             : GrPathShader(kTessellate_GrStrokeTessellateShader_ClassID, viewMatrix,
                            (mode == Mode::kTessellation) ?
                                    GrPrimitiveType::kPatches : GrPrimitiveType::kTriangleStrip,
                            (mode == Mode::kTessellation) ? 1 : 0)
             , fMode(mode)
-            , fHasConics(hasConics)
+            , fShaderFlags(shaderFlags)
             , fStroke(stroke)
             , fColor(color) {
         if (fMode == Mode::kTessellation) {
-            constexpr static Attribute kTessellationAttribs[] = {
-                    // A join calculates its starting angle using inputPrevCtrlPt.
-                    {"inputPrevCtrlPt", kFloat2_GrVertexAttribType, kFloat2_GrSLType},
-                    // inputPts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then
-                    // it's a conic with w=p3.x.
-                    //
-                    // If p0 == inputPrevCtrlPt, then no join is emitted.
-                    //
-                    // inputPts=[p0, p3, p3, p3] is a reserved pattern that means this patch is a
-                    // join only, whose start and end tangents are (p0 - inputPrevCtrlPt) and
-                    // (p3 - p0).
-                    //
-                    // inputPts=[p0, p0, p0, p3] is a reserved pattern that means this patch is a
-                    // "bowtie", or double-sided round join, anchored on p0 and rotating from
-                    // (p0 - inputPrevCtrlPt) to (p3 - p0).
-                    {"inputPts01", kFloat4_GrVertexAttribType, kFloat4_GrSLType},
-                    {"inputPts23", kFloat4_GrVertexAttribType, kFloat4_GrSLType}};
-            this->setVertexAttributes(kTessellationAttribs, SK_ARRAY_COUNT(kTessellationAttribs));
-            SkASSERT(this->vertexStride() == kTessellationPatchBaseStride);
+            // A join calculates its starting angle using prevCtrlPtAttr.
+            fAttribs.emplace_back("prevCtrlPtAttr", kFloat2_GrVertexAttribType, kFloat2_GrSLType);
+            // pts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then it's a conic
+            // with w=p3.x.
+            //
+            // If p0 == prevCtrlPtAttr, then no join is emitted.
+            //
+            // pts=[p0, p3, p3, p3] is a reserved pattern that means this patch is a join only,
+            // whose start and end tangents are (p0 - inputPrevCtrlPt) and (p3 - p0).
+            //
+            // pts=[p0, p0, p0, p3] is a reserved pattern that means this patch is a "bowtie", or
+            // double-sided round join, anchored on p0 and rotating from (p0 - prevCtrlPtAttr) to
+            // (p3 - p0).
+            fAttribs.emplace_back("pts01Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
+            fAttribs.emplace_back("pts23Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
         } else {
-            constexpr static Attribute kIndirectAttribs[] = {
-                    // pts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then it's
-                    // a conic with w=p3.x.
-                    //
-                    // An empty stroke (p0==p1==p2==p3) is a special case that denotes a circle, or
-                    // 180-degree point stroke.
-                    {"pts01", kFloat4_GrVertexAttribType, kFloat4_GrSLType},
-                    {"pts23", kFloat4_GrVertexAttribType, kFloat4_GrSLType},
-                    // "lastControlPoint" and "numTotalEdges" are both packed into these args.
-                    //
-                    // A join calculates its starting angle using "args.xy=lastControlPoint".
-                    //
-                    // "abs(args.z=numTotalEdges)" tells the shader the literal number of edges in
-                    // the triangle strip being rendered (i.e., it should be vertexCount/2). If
-                    // numTotalEdges is negative and the join type is "kRound", it also instructs
-                    // the shader to only allocate one segment the preceding round join.
-                    {"args", kFloat3_GrVertexAttribType, kFloat3_GrSLType}};
-            this->setInstanceAttributes(kIndirectAttribs, SK_ARRAY_COUNT(kIndirectAttribs));
-            SkASSERT(this->instanceStride() == kIndirectInstanceBaseStride);
+            // pts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then it's a conic
+            // with w=p3.x.
+            //
+            // An empty stroke (p0==p1==p2==p3) is a special case that denotes a circle, or
+            // 180-degree point stroke.
+            fAttribs.emplace_back("pts01Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
+            fAttribs.emplace_back("pts23Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
+            // "lastControlPoint" and "numTotalEdges" are both packed into argsAttr.
+            //
+            // A join calculates its starting angle using "argsAttr.xy=lastControlPoint".
+            //
+            // "abs(argsAttr.z=numTotalEdges)" tells the shader the literal number of edges in the
+            // triangle strip being rendered (i.e., it should be vertexCount/2). If numTotalEdges is
+            // negative and the join type is "kRound", it also instructs the shader to only allocate
+            // one segment the preceding round join.
+            fAttribs.emplace_back("argsAttr", kFloat3_GrVertexAttribType, kFloat3_GrSLType);
+        }
+        if (fShaderFlags & ShaderFlags::kDynamicStroke) {
+            fAttribs.emplace_back("dynamicStrokeAttr", kFloat2_GrVertexAttribType,
+                                  kFloat2_GrSLType);
+        }
+        if (fMode == Mode::kTessellation) {
+            this->setVertexAttributes(fAttribs.data(), fAttribs.count());
+            SkASSERT(this->vertexStride() == PatchStride(fShaderFlags));
+        } else {
+            this->setInstanceAttributes(fAttribs.data(), fAttribs.count());
+            SkASSERT(this->instanceStride() == IndirectInstanceStride(fShaderFlags));
         }
     }
 
+    bool hasConics() const { return fShaderFlags & ShaderFlags::kHasConics; }
+    bool hasDynamicStroke() const { return fShaderFlags & ShaderFlags::kDynamicStroke; }
+
 private:
     const char* name() const override { return "GrStrokeTessellateShader"; }
     void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const override;
@@ -180,12 +234,15 @@
                                          const GrShaderCaps&) const override;
 
     const Mode fMode;
-    const bool fHasConics;
+    const ShaderFlags fShaderFlags;
     const SkStrokeRec fStroke;
     const SkPMColor4f fColor;
+    SkSTArray<4, Attribute> fAttribs;
 
     class TessellationImpl;
     class IndirectImpl;
 };
 
+GR_MAKE_BITFIELD_CLASS_OPS(GrStrokeTessellateShader::ShaderFlags);
+
 #endif