Initial clipShader implementation for SkClipStack and GPU

Change-Id: I0af800900a7fbd9d16af0058ee0754358ebc3875
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/293562
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/src/core/SkClipStack.cpp b/src/core/SkClipStack.cpp
index 349fdc6..fed7bf3 100644
--- a/src/core/SkClipStack.cpp
+++ b/src/core/SkClipStack.cpp
@@ -9,6 +9,9 @@
 #include "include/core/SkPath.h"
 #include "src/core/SkClipOpPriv.h"
 #include "src/core/SkClipStack.h"
+#include "src/core/SkRectPriv.h"
+#include "src/shaders/SkShaderBase.h"
+
 #include <atomic>
 #include <new>
 
@@ -21,15 +24,22 @@
         case DeviceSpaceType::kEmpty:
             fDeviceSpaceRRect.setEmpty();
             fDeviceSpacePath.reset();
+            fShader.reset();
             break;
         case DeviceSpaceType::kRect:  // Rect uses rrect
         case DeviceSpaceType::kRRect:
             fDeviceSpacePath.reset();
+            fShader.reset();
             fDeviceSpaceRRect = that.fDeviceSpaceRRect;
             break;
         case DeviceSpaceType::kPath:
+            fShader.reset();
             fDeviceSpacePath.set(that.getDeviceSpacePath());
             break;
+        case DeviceSpaceType::kShader:
+            fDeviceSpacePath.reset();
+            fShader = that.fShader;
+            break;
     }
 
     fSaveCount = that.fSaveCount;
@@ -60,6 +70,8 @@
         return false;
     }
     switch (fDeviceSpaceType) {
+        case DeviceSpaceType::kShader:
+            return this->getShader() == element.getShader();
         case DeviceSpaceType::kPath:
             return this->getDeviceSpacePath() == element.getDeviceSpacePath();
         case DeviceSpaceType::kRRect:
@@ -76,12 +88,18 @@
 
 const SkRect& SkClipStack::Element::getBounds() const {
     static const SkRect kEmpty = {0, 0, 0, 0};
+    static const SkRect kInfinite = SkRectPriv::MakeLargeS32();
     switch (fDeviceSpaceType) {
         case DeviceSpaceType::kRect:  // fallthrough
         case DeviceSpaceType::kRRect:
             return fDeviceSpaceRRect.getBounds();
         case DeviceSpaceType::kPath:
             return fDeviceSpacePath.get()->getBounds();
+        case DeviceSpaceType::kShader:
+            // Shaders have infinite bounds since any pixel could have clipped or full coverage
+            // (which is different from wide-open, where every pixel has 1.0 coverage, or empty
+            //  where every pixel has 0.0 coverage).
+            return kInfinite;
         case DeviceSpaceType::kEmpty:
             return kEmpty;
         default:
@@ -99,6 +117,7 @@
         case DeviceSpaceType::kPath:
             return fDeviceSpacePath.get()->conservativelyContainsRect(rect);
         case DeviceSpaceType::kEmpty:
+        case DeviceSpaceType::kShader:
             return false;
         default:
             SkDEBUGFAIL("Unexpected type.");
@@ -116,6 +135,7 @@
         case DeviceSpaceType::kPath:
             return fDeviceSpacePath.get()->conservativelyContainsRect(rrect.getBounds());
         case DeviceSpaceType::kEmpty:
+        case DeviceSpaceType::kShader:
             return false;
         default:
             SkDEBUGFAIL("Unexpected type.");
@@ -140,6 +160,9 @@
         case DeviceSpaceType::kPath:
             fDeviceSpacePath.get()->toggleInverseFillType();
             break;
+        case DeviceSpaceType::kShader:
+            fShader = as_SB(fShader)->makeInvertAlpha();
+            break;
         case DeviceSpaceType::kEmpty:
             // Should this set to an empty, inverse filled path?
             break;
@@ -219,6 +242,13 @@
     this->initCommon(saveCount, op, doAA);
 }
 
+void SkClipStack::Element::initShader(int saveCount, sk_sp<SkShader> shader) {
+    SkASSERT(shader);
+    fDeviceSpaceType = DeviceSpaceType::kShader;
+    fShader = std::move(shader);
+    this->initCommon(saveCount, SkClipOp::kIntersect, false);
+}
+
 void SkClipStack::Element::asDeviceSpacePath(SkPath* path) const {
     switch (fDeviceSpaceType) {
         case DeviceSpaceType::kEmpty:
@@ -235,6 +265,10 @@
         case DeviceSpaceType::kPath:
             *path = *fDeviceSpacePath.get();
             break;
+        case DeviceSpaceType::kShader:
+            path->reset();
+            path->addRect(SkRectPriv::MakeLargeS32());
+            break;
     }
     path->setIsVolatile(true);
 }
@@ -246,6 +280,7 @@
     fIsIntersectionOfRects = false;
     fDeviceSpaceRRect.setEmpty();
     fDeviceSpacePath.reset();
+    fShader.reset();
     fGenID = kEmptyGenID;
     SkDEBUGCODE(this->checkEmpty();)
 }
@@ -257,6 +292,7 @@
     SkASSERT(kEmptyGenID == fGenID);
     SkASSERT(fDeviceSpaceRRect.isEmpty());
     SkASSERT(!fDeviceSpacePath.isValid());
+    SkASSERT(!fShader);
 }
 
 bool SkClipStack::Element::canBeIntersectedInPlace(int saveCount, SkClipOp op) const {
@@ -501,6 +537,14 @@
                 fFiniteBoundType = kNormal_BoundsType;
             }
             break;
+        case DeviceSpaceType::kShader:
+            // A shader is infinite. We don't act as wide-open here (which is an empty bounds with
+            // the inside out type). This is because when the bounds is empty and inside-out, we
+            // know there's full coverage everywhere. With a shader, there's *unknown* coverage
+            // everywhere.
+            fFiniteBound = SkRectPriv::MakeLargeS32();
+            fFiniteBoundType = kNormal_BoundsType;
+            break;
         case DeviceSpaceType::kEmpty:
             SkDEBUGFAIL("We shouldn't get here with an empty element.");
             break;
@@ -758,6 +802,15 @@
                 case Element::DeviceSpaceType::kEmpty:
                     SkDEBUGCODE(prior->checkEmpty();)
                     return;
+                case Element::DeviceSpaceType::kShader:
+                    if (Element::DeviceSpaceType::kShader == element.getDeviceSpaceType()) {
+                        prior->fShader = SkShaders::Blend(SkBlendMode::kSrcIn,
+                                                          element.fShader, prior->fShader);
+                        Element* priorPrior = (Element*) iter.prev();
+                        prior->updateBoundAndGenID(priorPrior);
+                        return;
+                    }
+                    break;
                 case Element::DeviceSpaceType::kRect:
                     if (Element::DeviceSpaceType::kRect == element.getDeviceSpaceType()) {
                         if (prior->rectRectIntersectAllowed(element.getDeviceSpaceRect(),
@@ -827,6 +880,14 @@
     }
 }
 
+void SkClipStack::clipShader(sk_sp<SkShader> shader) {
+    Element element(fSaveCount, std::move(shader));
+    this->pushElement(element);
+    // clipShader should not be used with expanding clip ops, so we shouldn't need to worry about
+    // the clip restriction rect either.
+    SkASSERT(fClipRestrictionRect.isEmpty());
+}
+
 void SkClipStack::clipEmpty() {
     Element* element = (Element*) fDeque.back();
 
@@ -1002,7 +1063,8 @@
     }
 
     const Element* back = static_cast<const Element*>(fDeque.back());
-    if (kInsideOut_BoundsType == back->fFiniteBoundType && back->fFiniteBound.isEmpty()) {
+    if (kInsideOut_BoundsType == back->fFiniteBoundType && back->fFiniteBound.isEmpty() &&
+        Element::DeviceSpaceType::kShader != back->fDeviceSpaceType) {
         return kWideOpenGenID;
     }
 
@@ -1015,12 +1077,14 @@
         "empty",
         "rect",
         "rrect",
-        "path"
+        "path",
+        "shader"
     };
     static_assert(0 == static_cast<int>(DeviceSpaceType::kEmpty), "enum mismatch");
     static_assert(1 == static_cast<int>(DeviceSpaceType::kRect), "enum mismatch");
     static_assert(2 == static_cast<int>(DeviceSpaceType::kRRect), "enum mismatch");
     static_assert(3 == static_cast<int>(DeviceSpaceType::kPath), "enum mismatch");
+    static_assert(4 == static_cast<int>(DeviceSpaceType::kShader), "enum mismatch");
     static_assert(SK_ARRAY_COUNT(kTypeStrings) == kTypeCnt, "enum mismatch");
 
     static const char* kOpStrings[] = {
@@ -1056,6 +1120,9 @@
         case DeviceSpaceType::kPath:
             this->getDeviceSpacePath().dump(nullptr, true, false);
             break;
+        case DeviceSpaceType::kShader:
+            // SkShaders don't provide much introspection that's worth while.
+            break;
     }
 }
 
diff --git a/src/core/SkClipStack.h b/src/core/SkClipStack.h
index 6a12c2f..fdbc59b 100644
--- a/src/core/SkClipStack.h
+++ b/src/core/SkClipStack.h
@@ -13,6 +13,7 @@
 #include "include/core/SkRRect.h"
 #include "include/core/SkRect.h"
 #include "include/core/SkRegion.h"
+#include "include/core/SkShader.h"
 #include "include/private/SkDeque.h"
 #include "src/core/SkClipOpPriv.h"
 #include "src/core/SkMessageBus.h"
@@ -59,8 +60,10 @@
             kRRect,
             //!< This element combines a device space path with the current clip.
             kPath,
+            //!< This element does not have geometry, but applies a shader to the clip
+            kShader,
 
-            kLastType = kPath
+            kLastType = kShader
         };
         static const int kTypeCnt = (int)DeviceSpaceType::kLastType + 1;
 
@@ -83,6 +86,10 @@
             this->initPath(0, path, m, op, doAA);
         }
 
+        Element(sk_sp<SkShader> shader) {
+            this->initShader(0, std::move(shader));
+        }
+
         ~Element();
 
         bool operator== (const Element& element) const;
@@ -113,6 +120,14 @@
             return fDeviceSpaceRRect.getBounds();
         }
 
+        //!<Call if getDeviceSpaceType() is kShader to get a reference to the clip shader.
+        sk_sp<SkShader> refShader() const {
+            return fShader;
+        }
+        const SkShader* getShader() const {
+            return fShader.get();
+        }
+
         //!< Call if getDeviceSpaceType() is not kEmpty to get the set operation used to combine
         //!< this element.
         SkClipOp getOp() const { return fOp; }
@@ -195,6 +210,7 @@
 
         SkTLazy<SkPath> fDeviceSpacePath;
         SkRRect fDeviceSpaceRRect;
+        sk_sp<SkShader> fShader;
         int fSaveCount;  // save count of stack when this element was added.
         SkClipOp fOp;
         DeviceSpaceType fDeviceSpaceType;
@@ -239,11 +255,16 @@
             this->initPath(saveCount, path, m, op, doAA);
         }
 
+        Element(int saveCount, sk_sp<SkShader> shader) {
+            this->initShader(saveCount, std::move(shader));
+        }
+
         void initCommon(int saveCount, SkClipOp op, bool doAA);
         void initRect(int saveCount, const SkRect&, const SkMatrix&, SkClipOp, bool doAA);
         void initRRect(int saveCount, const SkRRect&, const SkMatrix&, SkClipOp, bool doAA);
         void initPath(int saveCount, const SkPath&, const SkMatrix&, SkClipOp, bool doAA);
         void initAsPath(int saveCount, const SkPath&, const SkMatrix&, SkClipOp, bool doAA);
+        void initShader(int saveCount, sk_sp<SkShader>);
 
         void setEmpty();
 
@@ -345,6 +366,7 @@
     void clipRect(const SkRect&, const SkMatrix& matrix, SkClipOp, bool doAA);
     void clipRRect(const SkRRect&, const SkMatrix& matrix, SkClipOp, bool doAA);
     void clipPath(const SkPath&, const SkMatrix& matrix, SkClipOp, bool doAA);
+    void clipShader(sk_sp<SkShader>);
     // An optimized version of clipDevRect(emptyRect, kIntersect, ...)
     void clipEmpty();
     void setDeviceClipRestriction(const SkIRect& rect) {
@@ -513,4 +535,3 @@
 };
 
 #endif
-
diff --git a/src/core/SkClipStackDevice.cpp b/src/core/SkClipStackDevice.cpp
index bbcec4f..2021852 100644
--- a/src/core/SkClipStackDevice.cpp
+++ b/src/core/SkClipStackDevice.cpp
@@ -39,6 +39,10 @@
     fClipStack.clipPath(path, this->localToDevice(), op, aa);
 }
 
+void SkClipStackDevice::onClipShader(sk_sp<SkShader> shader) {
+    fClipStack.clipShader(std::move(shader));
+}
+
 void SkClipStackDevice::onClipRegion(const SkRegion& rgn, SkClipOp op) {
     SkIPoint origin = this->getOrigin();
     SkRegion tmp;
diff --git a/src/core/SkClipStackDevice.h b/src/core/SkClipStackDevice.h
index e7fb96a..7cbf213 100644
--- a/src/core/SkClipStackDevice.h
+++ b/src/core/SkClipStackDevice.h
@@ -27,6 +27,7 @@
     void onClipRect(const SkRect& rect, SkClipOp, bool aa) override;
     void onClipRRect(const SkRRect& rrect, SkClipOp, bool aa) override;
     void onClipPath(const SkPath& path, SkClipOp, bool aa) override;
+    void onClipShader(sk_sp<SkShader>) override;
     void onClipRegion(const SkRegion& deviceRgn, SkClipOp) override;
     void onSetDeviceClipRestriction(SkIRect* mutableClipRestriction) override;
     bool onClipIsAA() const override;
diff --git a/src/gpu/GrClipStackClip.cpp b/src/gpu/GrClipStackClip.cpp
index f0f31a6..b8d60b5 100644
--- a/src/gpu/GrClipStackClip.cpp
+++ b/src/gpu/GrClipStackClip.cpp
@@ -253,7 +253,8 @@
     // The opsTask ID must not be looked up until AFTER producing the clip mask (if any). That step
     // can cause a flush or otherwise change which opstask our draw is going into.
     uint32_t opsTaskID = renderTargetContext->getOpsTask()->uniqueID();
-    if (auto clipFPs = reducedClip.finishAndDetachAnalyticFPs(ccpr, opsTaskID)) {
+    if (auto clipFPs = reducedClip.finishAndDetachAnalyticFPs(context, *fMatrixProvider, ccpr,
+                                                              opsTaskID)) {
         out->addCoverageFP(std::move(clipFPs));
     }
 
diff --git a/src/gpu/GrClipStackClip.h b/src/gpu/GrClipStackClip.h
index 90ca867..7bef90b 100644
--- a/src/gpu/GrClipStackClip.h
+++ b/src/gpu/GrClipStackClip.h
@@ -20,9 +20,15 @@
  */
 class GrClipStackClip final : public GrClip {
 public:
-    GrClipStackClip(const SkClipStack* stack = nullptr) { this->reset(stack); }
+    GrClipStackClip(const SkClipStack* stack = nullptr,
+                    const SkMatrixProvider* matrixProvider = nullptr) {
+        this->reset(stack, matrixProvider);
+    }
 
-    void reset(const SkClipStack* stack) { fStack = stack; }
+    void reset(const SkClipStack* stack, const SkMatrixProvider* matrixProvider) {
+        fStack = stack;
+        fMatrixProvider = matrixProvider;
+    }
 
     bool quickContains(const SkRect&) const final;
     bool quickContains(const SkRRect&) const final;
@@ -61,7 +67,8 @@
                               const GrRenderTargetContext*,
                               const GrReducedClip&);
 
-    const SkClipStack*  fStack;
+    const SkClipStack*      fStack;
+    const SkMatrixProvider* fMatrixProvider; // for applying clip shaders
 };
 
 #endif // GrClipStackClip_DEFINED
diff --git a/src/gpu/GrFPArgs.h b/src/gpu/GrFPArgs.h
index 3058f86..6b21e32 100644
--- a/src/gpu/GrFPArgs.h
+++ b/src/gpu/GrFPArgs.h
@@ -28,7 +28,13 @@
     }
 
     class WithPreLocalMatrix;
-    class WithPostLocalMatrix;
+
+    GrFPArgs withNewMatrixProvider(const SkMatrixProvider& provider) const {
+        GrFPArgs newArgs(fContext, provider, fFilterQuality, fDstColorInfo);
+        newArgs.fInputColorIsOpaque = fInputColorIsOpaque;
+        newArgs.fPreLocalMatrix = fPreLocalMatrix;
+        return newArgs;
+    }
 
     GrRecordingContext* fContext;
     const SkMatrixProvider& fMatrixProvider;
@@ -65,4 +71,3 @@
 };
 
 #endif
-
diff --git a/src/gpu/GrReducedClip.cpp b/src/gpu/GrReducedClip.cpp
index 93c01b6..d1b1041 100644
--- a/src/gpu/GrReducedClip.cpp
+++ b/src/gpu/GrReducedClip.cpp
@@ -25,7 +25,9 @@
 #include "src/gpu/effects/GrConvexPolyEffect.h"
 #include "src/gpu/effects/GrRRectEffect.h"
 #include "src/gpu/effects/generated/GrAARectEffect.h"
+#include "src/gpu/effects/generated/GrDeviceSpaceEffect.h"
 #include "src/gpu/geometry/GrStyledShape.h"
+#include "src/shaders/SkShaderBase.h"
 
 /**
  * There are plenty of optimizations that could be added here. Maybe flips could be folded into
@@ -180,6 +182,18 @@
             break;
         }
 
+        if (element->getDeviceSpaceType() == Element::DeviceSpaceType::kShader) {
+            if (fShader) {
+                // Combine multiple shaders together with src-in blending. This works because all
+                // shaders are effectively intersections (difference ops have been modified to be
+                // 1 - alpha already).
+                fShader = SkShaders::Blend(SkBlendMode::kSrcIn, element->refShader(), fShader);
+            } else {
+                fShader = element->refShader();
+            }
+            continue;
+        }
+
         bool skippable = false;
         bool isFlip = false; // does this op just flip the in/out state of every point in the bounds
 
@@ -487,6 +501,8 @@
 }
 
 GrReducedClip::ClipResult GrReducedClip::clipInsideElement(const Element* element) {
+    SkASSERT(element->getDeviceSpaceType() != Element::DeviceSpaceType::kShader);
+
     SkIRect elementIBounds;
     if (!element->isAA()) {
         element->getBounds().round(&elementIBounds);
@@ -530,12 +546,17 @@
         case Element::DeviceSpaceType::kPath:
             return this->addAnalyticFP(element->getDeviceSpacePath(),
                                        Invert(element->isInverseFilled()), GrAA(element->isAA()));
+
+        case Element::DeviceSpaceType::kShader:
+            SkUNREACHABLE;
     }
 
     SK_ABORT("Unexpected DeviceSpaceType");
 }
 
 GrReducedClip::ClipResult GrReducedClip::clipOutsideElement(const Element* element) {
+    SkASSERT(element->getDeviceSpaceType() != Element::DeviceSpaceType::kShader);
+
     switch (element->getDeviceSpaceType()) {
         case Element::DeviceSpaceType::kEmpty:
             return ClipResult::kMadeEmpty;
@@ -596,6 +617,9 @@
         case Element::DeviceSpaceType::kPath:
             return this->addAnalyticFP(element->getDeviceSpacePath(),
                                        Invert(!element->isInverseFilled()), GrAA(element->isAA()));
+
+        case Element::DeviceSpaceType::kShader:
+            SkUNREACHABLE;
     }
 
     SK_ABORT("Unexpected DeviceSpaceType");
@@ -680,6 +704,7 @@
     fAAClipRectGenID = SK_InvalidGenID;
     fWindowRects.reset();
     fMaskElements.reset();
+    fShader.reset();
     fInitialState = InitialState::kAllOut;
 }
 
@@ -858,6 +883,7 @@
 }
 
 std::unique_ptr<GrFragmentProcessor> GrReducedClip::finishAndDetachAnalyticFPs(
+        GrRecordingContext* context, const SkMatrixProvider& matrixProvider,
         GrCoverageCountingPathRenderer* ccpr, uint32_t opsTaskID) {
     // Make sure finishAndDetachAnalyticFPs hasn't been called already.
     SkDEBUGCODE(for (const auto& fp : fAnalyticFPs) { SkASSERT(fp); })
@@ -873,5 +899,17 @@
         fCCPRClipPaths.reset();
     }
 
+    static const GrColorInfo kCoverageColorInfo = GrColorInfo(GrColorType::kUnknown,
+                                                              kPremul_SkAlphaType,
+                                                              nullptr);
+    if (fShader) {
+        GrFPArgs args(context, matrixProvider, kNone_SkFilterQuality, &kCoverageColorInfo);
+        auto fp = as_SB(fShader)->asFragmentProcessor(args);
+        if (fp) {
+            fp = GrFragmentProcessor::SwizzleOutput(std::move(fp), GrSwizzle::AAAA());
+            fAnalyticFPs.push_back(std::move(fp));
+        }
+    }
+
     return GrFragmentProcessor::RunInSeries(fAnalyticFPs.begin(), fAnalyticFPs.count());
 }
diff --git a/src/gpu/GrReducedClip.h b/src/gpu/GrReducedClip.h
index ca95bd0..b7f55a0 100644
--- a/src/gpu/GrReducedClip.h
+++ b/src/gpu/GrReducedClip.h
@@ -53,6 +53,13 @@
     bool hasScissor() const { return fHasScissor; }
 
     /**
+     * Indicates if there is a clip shader, representing the merge of all shader elements of the
+     * original stack.
+     */
+    bool hasShader() const { return SkToBool(fShader); }
+    sk_sp<SkShader> shader() const { SkASSERT(fShader); return fShader; }
+
+    /**
      * If nonempty, the clip mask is not valid inside these windows and the caller must clip them
      * out using the window rectangles GPU extension.
      */
@@ -97,6 +104,7 @@
      * may cause flushes or otherwise change which opsTask the actual draw is going into.
      */
     std::unique_ptr<GrFragmentProcessor> finishAndDetachAnalyticFPs(
+            GrRecordingContext*, const SkMatrixProvider& matrixProvider,
             GrCoverageCountingPathRenderer*, uint32_t opsTaskID);
 
 private:
@@ -146,6 +154,9 @@
     bool fMaskRequiresAA;
     SkSTArray<4, std::unique_ptr<GrFragmentProcessor>> fAnalyticFPs;
     SkSTArray<4, SkPath> fCCPRClipPaths; // Will convert to FPs once we have an opsTask ID for CCPR.
+    // Will be the combination of all kShader elements or null if there's no clip shader.
+    // Does not count against the analytic FP limit.
+    sk_sp<SkShader> fShader;
 };
 
 #endif
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 573225c..dd13b97 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -125,7 +125,7 @@
                     renderTargetContext->surfaceProps())
         , fContext(SkRef(context))
         , fRenderTargetContext(std::move(renderTargetContext))
-        , fClip(&this->cs()) {
+        , fClip(&this->cs(), &this->asMatrixProvider()) {
     if (flags & kNeedClear_Flag) {
         this->clearAll();
     }
diff --git a/src/gpu/effects/GrDeviceSpaceEffect.fp b/src/gpu/effects/GrDeviceSpaceEffect.fp
index 879c7c4..8dfb2c4 100644
--- a/src/gpu/effects/GrDeviceSpaceEffect.fp
+++ b/src/gpu/effects/GrDeviceSpaceEffect.fp
@@ -7,8 +7,18 @@
 
 in fragmentProcessor fp;
 
+in uniform float3x3 matrix;
+
 void main() {
-     sk_OutColor = sample(fp, sk_InColor, sk_FragCoord.xy);
+    float3 p = matrix * (sk_FragCoord.xy1);
+    sk_OutColor = sample(fp, sk_InColor, p.xy / p.z);
+}
+
+@make{
+    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor> fp,
+                                                     const SkMatrix& matrix = SkMatrix::I()) {
+        return std::unique_ptr<GrFragmentProcessor>(new GrDeviceSpaceEffect(std::move(fp), matrix));
+    }
 }
 
 @test(d) {
diff --git a/src/gpu/effects/generated/GrDeviceSpaceEffect.cpp b/src/gpu/effects/generated/GrDeviceSpaceEffect.cpp
index be9e92c..cf0032d 100644
--- a/src/gpu/effects/generated/GrDeviceSpaceEffect.cpp
+++ b/src/gpu/effects/generated/GrDeviceSpaceEffect.cpp
@@ -23,17 +23,27 @@
         GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
         const GrDeviceSpaceEffect& _outer = args.fFp.cast<GrDeviceSpaceEffect>();
         (void)_outer;
-        SkString _input204 = SkStringPrintf("%s", args.fInputColor);
-        SkString _sample204;
-        SkString _coords204("sk_FragCoord.xy");
-        _sample204 =
-                this->invokeChild(_outer.fp_index, _input204.c_str(), args, _coords204.c_str());
-        fragBuilder->codeAppendf("%s = %s;\n", args.fOutputColor, _sample204.c_str());
+        auto matrix = _outer.matrix;
+        (void)matrix;
+        matrixVar = args.fUniformHandler->addUniform(&_outer, kFragment_GrShaderFlag,
+                                                     kFloat3x3_GrSLType, "matrix");
+        fragBuilder->codeAppendf("float3 p = %s * float3(sk_FragCoord.xy, 1);",
+                                 args.fUniformHandler->getUniformCStr(matrixVar));
+        SkString _input276 = SkStringPrintf("%s", args.fInputColor);
+        SkString _sample276;
+        SkString _coords276("p.xy / p.z");
+        _sample276 =
+                this->invokeChild(_outer.fp_index, _input276.c_str(), args, _coords276.c_str());
+        fragBuilder->codeAppendf("\n%s = %s;\n", args.fOutputColor, _sample276.c_str());
     }
 
 private:
     void onSetData(const GrGLSLProgramDataManager& pdman,
-                   const GrFragmentProcessor& _proc) override {}
+                   const GrFragmentProcessor& _proc) override {
+        const GrDeviceSpaceEffect& _outer = _proc.cast<GrDeviceSpaceEffect>();
+        { pdman.setSkMatrix(matrixVar, (_outer.matrix)); }
+    }
+    UniformHandle matrixVar;
 };
 GrGLSLFragmentProcessor* GrDeviceSpaceEffect::onCreateGLSLInstance() const {
     return new GrGLSLDeviceSpaceEffect();
@@ -43,10 +53,13 @@
 bool GrDeviceSpaceEffect::onIsEqual(const GrFragmentProcessor& other) const {
     const GrDeviceSpaceEffect& that = other.cast<GrDeviceSpaceEffect>();
     (void)that;
+    if (matrix != that.matrix) return false;
     return true;
 }
 GrDeviceSpaceEffect::GrDeviceSpaceEffect(const GrDeviceSpaceEffect& src)
-        : INHERITED(kGrDeviceSpaceEffect_ClassID, src.optimizationFlags()), fp_index(src.fp_index) {
+        : INHERITED(kGrDeviceSpaceEffect_ClassID, src.optimizationFlags())
+        , fp_index(src.fp_index)
+        , matrix(src.matrix) {
     {
         auto clone = src.childProcessor(fp_index).clone();
         if (src.childProcessor(fp_index).isSampledWithExplicitCoords()) {
diff --git a/src/gpu/effects/generated/GrDeviceSpaceEffect.h b/src/gpu/effects/generated/GrDeviceSpaceEffect.h
index e08d29d..3717431 100644
--- a/src/gpu/effects/generated/GrDeviceSpaceEffect.h
+++ b/src/gpu/effects/generated/GrDeviceSpaceEffect.h
@@ -17,17 +17,19 @@
 #include "src/gpu/GrFragmentProcessor.h"
 class GrDeviceSpaceEffect : public GrFragmentProcessor {
 public:
-    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor> fp) {
-        return std::unique_ptr<GrFragmentProcessor>(new GrDeviceSpaceEffect(std::move(fp)));
+    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor> fp,
+                                                     const SkMatrix& matrix = SkMatrix::I()) {
+        return std::unique_ptr<GrFragmentProcessor>(new GrDeviceSpaceEffect(std::move(fp), matrix));
     }
     GrDeviceSpaceEffect(const GrDeviceSpaceEffect& src);
     std::unique_ptr<GrFragmentProcessor> clone() const override;
     const char* name() const override { return "DeviceSpaceEffect"; }
     int fp_index = -1;
+    SkMatrix matrix;
 
 private:
-    GrDeviceSpaceEffect(std::unique_ptr<GrFragmentProcessor> fp)
-            : INHERITED(kGrDeviceSpaceEffect_ClassID, kNone_OptimizationFlags) {
+    GrDeviceSpaceEffect(std::unique_ptr<GrFragmentProcessor> fp, SkMatrix matrix)
+            : INHERITED(kGrDeviceSpaceEffect_ClassID, kNone_OptimizationFlags), matrix(matrix) {
         SkASSERT(fp);
         fp_index = this->numChildProcessors();
         fp->setSampledWithExplicitCoords();
diff --git a/src/shaders/SkLocalMatrixShader.cpp b/src/shaders/SkLocalMatrixShader.cpp
index 66ee52b..cc95a30 100644
--- a/src/shaders/SkLocalMatrixShader.cpp
+++ b/src/shaders/SkLocalMatrixShader.cpp
@@ -12,6 +12,7 @@
 
 #if SK_SUPPORT_GPU
 #include "src/gpu/GrFragmentProcessor.h"
+#include "src/gpu/effects/generated/GrDeviceSpaceEffect.h"
 #endif
 
 #if SK_SUPPORT_GPU
@@ -172,8 +173,23 @@
 #if SK_SUPPORT_GPU
 std::unique_ptr<GrFragmentProcessor> SkCTMShader::asFragmentProcessor(
         const GrFPArgs& args) const {
-    return as_SB(fProxyShader)->asFragmentProcessor(
-        GrFPArgs::WithPreLocalMatrix(args, this->getLocalMatrix()));
+    SkMatrix ctmInv;
+    if (!fCTM.invert(&ctmInv)) {
+        return nullptr;
+    }
+
+    auto ctmProvider = SkOverrideDeviceMatrixProvider(args.fMatrixProvider, fCTM);
+    auto base = as_SB(fProxyShader)->asFragmentProcessor(
+        GrFPArgs::WithPreLocalMatrix(args.withNewMatrixProvider(ctmProvider),
+                                     this->getLocalMatrix()));
+    if (!base) {
+        return nullptr;
+    }
+
+    // In order for the shader to be evaluated with the original CTM, we explicitly evaluate it
+    // at sk_FragCoord, and pass that through the inverse of the original CTM. This avoids requiring
+    // local coords for the shader and mapping from the draw's local to device and then back.
+    return GrDeviceSpaceEffect::Make(std::move(base), ctmInv);
 }
 #endif
 
@@ -183,6 +199,5 @@
 }
 
 sk_sp<SkShader> SkShaderBase::makeWithCTM(const SkMatrix& postM) const {
-    return postM.isIdentity() ? sk_ref_sp(this)
-                              : sk_sp<SkShader>(new SkCTMShader(sk_ref_sp(this), postM));
+    return sk_sp<SkShader>(new SkCTMShader(sk_ref_sp(this), postM));
 }
diff --git a/src/svg/SkSVGDevice.cpp b/src/svg/SkSVGDevice.cpp
index a6cb77d..3fbe412 100644
--- a/src/svg/SkSVGDevice.cpp
+++ b/src/svg/SkSVGDevice.cpp
@@ -760,6 +760,9 @@
                 path.addAttribute("clip-rule", "evenodd");
             }
         } break;
+        case SkClipStack::Element::DeviceSpaceType::kShader:
+            // TODO: handle shader clipping, perhaps rasterize and apply as a mask image?
+            break;
         }
 
         return cid;
diff --git a/src/utils/SkClipStackUtils.cpp b/src/utils/SkClipStackUtils.cpp
index 00b9336..fade198 100644
--- a/src/utils/SkClipStackUtils.cpp
+++ b/src/utils/SkClipStackUtils.cpp
@@ -14,6 +14,11 @@
 
     SkClipStack::Iter iter(cs, SkClipStack::Iter::kBottom_IterStart);
     while (const SkClipStack::Element* element = iter.next()) {
+        if (element->getDeviceSpaceType() == SkClipStack::Element::DeviceSpaceType::kShader) {
+            // TODO: Handle DeviceSpaceType::kShader somehow; it can't be turned into an SkPath
+            // but perhaps the pdf backend can apply shaders in another way.
+            continue;
+        }
         SkPath operand;
         if (element->getDeviceSpaceType() != SkClipStack::Element::DeviceSpaceType::kEmpty) {
             element->asDeviceSpacePath(&operand);