This is a reland of GrDomainEffect with significant changes:

1) It correctly handles GPs that have a local matrix

2) It applies its rectangle after the child FP's coord transform. It makes
the child's transform be a no-op and the builder will no longer insert code
or uniforms for the child transform. The domain effect adds its own coord
transform with the same settings as the child's original transform. The result
is that the generated code only has one coord transform matrix and that matrix
is applies in the vertex shader. The previous version of this effect applied
the transform in the fragment shader.

Bug: skia:9570

Change-Id: I514e959414aebe240e9f99e30f13265d8751b656
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/257054
Commit-Queue: Brian Salomon <bsalomon@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
diff --git a/gm/texturedomaineffect.cpp b/gm/texturedomaineffect.cpp
index 924e3b5..e7d08cd 100644
--- a/gm/texturedomaineffect.cpp
+++ b/gm/texturedomaineffect.cpp
@@ -33,7 +33,7 @@
 
 namespace skiagm {
 /**
- * This GM directly exercises GrTextureDomainEffect.
+ * This GM directly exercises GrDomainEffect.
  */
 class TextureDomainEffect : public GpuGM {
 public:
@@ -124,9 +124,11 @@
                         // Repeat mode doesn't produce correct results with bilerp filtering
                         continue;
                     }
-                    SkRect domainRect = GrTextureDomain::MakeTexelDomain(texelDomains[d], mode);
-                    auto fp1 = GrTextureDomainEffect::Make(
-                            proxy, fBitmap.alphaType(), textureMatrices[tm], domainRect, mode, fFilter);
+                    auto fp1 = GrSimpleTextureEffect::Make(proxy, fBitmap.alphaType(),
+                                                           textureMatrices[tm], fFilter);
+                    fp1 = GrDomainEffect::Make(
+                            std::move(fp1), GrTextureDomain::MakeTexelDomain(texelDomains[d], mode),
+                            mode, fFilter);
                     if (!fp1) {
                         continue;
                     }
diff --git a/src/core/SkGpuBlurUtils.cpp b/src/core/SkGpuBlurUtils.cpp
index c5ee947..6245ecf 100644
--- a/src/core/SkGpuBlurUtils.cpp
+++ b/src/core/SkGpuBlurUtils.cpp
@@ -320,8 +320,10 @@
         }
 
         GrPaint paint;
+        auto fp = GrSimpleTextureEffect::Make(std::move(srcProxy), srcAlphaType, SkMatrix::I(),
+                                              GrSamplerState::Filter::kBilerp);
         if (GrTextureDomain::kIgnore_Mode != mode && i == 1) {
-            // GrTextureDomainEffect does not support kRepeat_Mode with GrSamplerState::Filter.
+            // GrDomainEffect does not support kRepeat_Mode with GrSamplerState::Filter.
             GrTextureDomain::Mode modeForScaling = GrTextureDomain::kRepeat_Mode == mode
                                                                 ? GrTextureDomain::kDecal_Mode
                                                                 : mode;
@@ -337,21 +339,13 @@
                 domain.fTop = domain.fBottom = SkScalarAve(domain.fTop, domain.fBottom);
             }
             domain.offset(proxyOffset.x(), proxyOffset.y());
-            auto fp = GrTextureDomainEffect::Make(std::move(srcProxy),
-                                                  srcAlphaType,
-                                                  SkMatrix::I(),
-                                                  domain,
-                                                  modeForScaling,
-                                                  GrSamplerState::Filter::kBilerp);
-            paint.addColorFragmentProcessor(std::move(fp));
+            fp = GrDomainEffect::Make(std::move(fp), domain, modeForScaling, true);
             srcRect.offset(-(*srcOffset));
             // TODO: consume the srcOffset in both first draws and always set it to zero
             // back in GaussianBlur
             srcOffset->set(0, 0);
-        } else {
-            paint.addColorTextureProcessor(std::move(srcProxy), srcAlphaType, SkMatrix::I(),
-                                           GrSamplerState::ClampBilerp());
         }
+        paint.addColorFragmentProcessor(std::move(fp));
         paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
 
         dstRenderTargetContext->fillRectToRect(GrFixedClip::Disabled(), std::move(paint), GrAA::kNo,
@@ -407,9 +401,9 @@
     GrPaint paint;
     SkRect domain = GrTextureDomain::MakeTexelDomain(localSrcBounds, GrTextureDomain::kClamp_Mode,
                                                      GrTextureDomain::kClamp_Mode);
-    auto fp = GrTextureDomainEffect::Make(std::move(srcProxy), srcAlphaType, SkMatrix::I(), domain,
-                                          GrTextureDomain::kClamp_Mode,
+    auto fp = GrSimpleTextureEffect::Make(std::move(srcProxy), srcAlphaType, SkMatrix::I(),
                                           GrSamplerState::Filter::kBilerp);
+    fp = GrDomainEffect::Make(std::move(fp), domain, GrTextureDomain::kClamp_Mode, true);
     paint.addColorFragmentProcessor(std::move(fp));
     paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
     GrFixedClip clip(SkIRect::MakeWH(finalW, finalH));
diff --git a/src/effects/imagefilters/SkArithmeticImageFilter.cpp b/src/effects/imagefilters/SkArithmeticImageFilter.cpp
index a4306bd..42c62be 100644
--- a/src/effects/imagefilters/SkArithmeticImageFilter.cpp
+++ b/src/effects/imagefilters/SkArithmeticImageFilter.cpp
@@ -353,10 +353,12 @@
         SkMatrix backgroundMatrix = SkMatrix::MakeTrans(
                 SkIntToScalar(bgSubset.left() - backgroundOffset.fX),
                 SkIntToScalar(bgSubset.top()  - backgroundOffset.fY));
-        bgFP = GrTextureDomainEffect::Make(
-                std::move(backgroundProxy), background->alphaType(), backgroundMatrix,
+        bgFP = GrSimpleTextureEffect::Make(std::move(backgroundProxy), background->alphaType(),
+                                           backgroundMatrix, GrSamplerState::Filter::kNearest);
+        bgFP = GrDomainEffect::Make(
+                std::move(bgFP),
                 GrTextureDomain::MakeTexelDomain(bgSubset, GrTextureDomain::kDecal_Mode),
-                GrTextureDomain::kDecal_Mode, GrSamplerState::Filter::kNearest);
+                GrTextureDomain::kDecal_Mode, false);
         bgFP = GrColorSpaceXformEffect::Make(std::move(bgFP), background->getColorSpace(),
                                              background->alphaType(),
                                              ctx.colorSpace());
@@ -370,10 +372,13 @@
         SkMatrix foregroundMatrix = SkMatrix::MakeTrans(
                 SkIntToScalar(fgSubset.left() - foregroundOffset.fX),
                 SkIntToScalar(fgSubset.top()  - foregroundOffset.fY));
-        auto foregroundFP = GrTextureDomainEffect::Make(
-                std::move(foregroundProxy), foreground->alphaType(), foregroundMatrix,
+        auto foregroundFP =
+                GrSimpleTextureEffect::Make(std::move(foregroundProxy), foreground->alphaType(),
+                                            foregroundMatrix, GrSamplerState::Filter::kNearest);
+        foregroundFP = GrDomainEffect::Make(
+                std::move(foregroundFP),
                 GrTextureDomain::MakeTexelDomain(fgSubset, GrTextureDomain::kDecal_Mode),
-                GrTextureDomain::kDecal_Mode, GrSamplerState::Filter::kNearest);
+                GrTextureDomain::kDecal_Mode, false);
         foregroundFP = GrColorSpaceXformEffect::Make(std::move(foregroundFP),
                                                      foreground->getColorSpace(),
                                                      foreground->alphaType(),
diff --git a/src/effects/imagefilters/SkXfermodeImageFilter.cpp b/src/effects/imagefilters/SkXfermodeImageFilter.cpp
index df4847e..b0b3d6b 100644
--- a/src/effects/imagefilters/SkXfermodeImageFilter.cpp
+++ b/src/effects/imagefilters/SkXfermodeImageFilter.cpp
@@ -266,10 +266,12 @@
         SkMatrix bgMatrix = SkMatrix::MakeTrans(
                 SkIntToScalar(bgSubset.left() - backgroundOffset.fX),
                 SkIntToScalar(bgSubset.top()  - backgroundOffset.fY));
-        bgFP = GrTextureDomainEffect::Make(
-                std::move(backgroundProxy), background->alphaType(), bgMatrix,
+        bgFP = GrSimpleTextureEffect::Make(std::move(backgroundProxy), background->alphaType(),
+                                           bgMatrix, GrSamplerState::Filter::kNearest);
+        bgFP = GrDomainEffect::Make(
+                std::move(bgFP),
                 GrTextureDomain::MakeTexelDomain(bgSubset, GrTextureDomain::kDecal_Mode),
-                GrTextureDomain::kDecal_Mode, GrSamplerState::Filter::kNearest);
+                GrTextureDomain::kDecal_Mode, false);
         bgFP = GrColorSpaceXformEffect::Make(std::move(bgFP), background->getColorSpace(),
                                              background->alphaType(),
                                              ctx.colorSpace());
@@ -283,10 +285,13 @@
         SkMatrix fgMatrix = SkMatrix::MakeTrans(
                 SkIntToScalar(fgSubset.left() - foregroundOffset.fX),
                 SkIntToScalar(fgSubset.top()  - foregroundOffset.fY));
-        auto foregroundFP = GrTextureDomainEffect::Make(
-                std::move(foregroundProxy), foreground->alphaType(), fgMatrix,
+        auto foregroundFP =
+                GrSimpleTextureEffect::Make(std::move(foregroundProxy), foreground->alphaType(),
+                                            fgMatrix, GrSamplerState::Filter::kNearest);
+        foregroundFP = GrDomainEffect::Make(
+                std::move(foregroundFP),
                 GrTextureDomain::MakeTexelDomain(fgSubset, GrTextureDomain::kDecal_Mode),
-                GrTextureDomain::kDecal_Mode, GrSamplerState::Filter::kNearest);
+                GrTextureDomain::kDecal_Mode, false);
         foregroundFP = GrColorSpaceXformEffect::Make(std::move(foregroundFP),
                                                      foreground->getColorSpace(),
                                                      foreground->alphaType(),
diff --git a/src/gpu/GrCoordTransform.h b/src/gpu/GrCoordTransform.h
index 6201ce1..ba85fb1 100644
--- a/src/gpu/GrCoordTransform.h
+++ b/src/gpu/GrCoordTransform.h
@@ -75,6 +75,7 @@
         return fProxy && fProxy->backendFormat().textureType() != GrTextureType::kRectangle;
     }
     bool reverseY() const { return fProxy && fProxy->origin() == kBottomLeft_GrSurfaceOrigin; }
+    bool isNoOp() const { return fMatrix.isIdentity() && !this->normalize() && !this->reverseY(); }
 
     // This should only ever be called at flush time after the backing texture has been
     // successfully instantiated
diff --git a/src/gpu/GrFragmentProcessor.cpp b/src/gpu/GrFragmentProcessor.cpp
index 23836f8..5299c07 100644
--- a/src/gpu/GrFragmentProcessor.cpp
+++ b/src/gpu/GrFragmentProcessor.cpp
@@ -386,7 +386,7 @@
 
 //////////////////////////////////////////////////////////////////////////////
 
-GrFragmentProcessor::Iter::Iter(const GrPaint& paint) {
+GrFragmentProcessor::CIter::CIter(const GrPaint& paint) {
     for (int i = paint.numCoverageFragmentProcessors() - 1; i >= 0; --i) {
         fFPStack.push_back(paint.getCoverageFragmentProcessor(i));
     }
@@ -395,7 +395,7 @@
     }
 }
 
-GrFragmentProcessor::Iter::Iter(const GrProcessorSet& set) {
+GrFragmentProcessor::CIter::CIter(const GrProcessorSet& set) {
     for (int i = set.numCoverageFragmentProcessors() - 1; i >= 0; --i) {
         fFPStack.push_back(set.coverageFragmentProcessor(i));
     }
@@ -404,25 +404,12 @@
     }
 }
 
-GrFragmentProcessor::Iter::Iter(const GrPipeline& pipeline) {
+GrFragmentProcessor::CIter::CIter(const GrPipeline& pipeline) {
     for (int i = pipeline.numFragmentProcessors() - 1; i >= 0; --i) {
         fFPStack.push_back(&pipeline.getFragmentProcessor(i));
     }
 }
 
-const GrFragmentProcessor& GrFragmentProcessor::Iter::operator*() const { return *fFPStack.back(); }
-const GrFragmentProcessor* GrFragmentProcessor::Iter::operator->() const { return fFPStack.back(); }
-
-GrFragmentProcessor::Iter& GrFragmentProcessor::Iter::operator++() {
-    SkASSERT(!fFPStack.empty());
-    const GrFragmentProcessor* back = fFPStack.back();
-    fFPStack.pop_back();
-    for (int i = back->numChildProcessors() - 1; i >= 0; --i) {
-        fFPStack.push_back(&back->childProcessor(i));
-    }
-    return *this;
-}
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 GrFragmentProcessor::TextureSampler::TextureSampler(sk_sp<GrSurfaceProxy> proxy,
diff --git a/src/gpu/GrFragmentProcessor.h b/src/gpu/GrFragmentProcessor.h
index b65db7a..03d22ae 100644
--- a/src/gpu/GrFragmentProcessor.h
+++ b/src/gpu/GrFragmentProcessor.h
@@ -111,8 +111,9 @@
     int numCoordTransforms() const { return fCoordTransforms.count(); }
 
     /** Returns the coordinate transformation at index. index must be valid according to
-        numTransforms(). */
+        numCoordTransforms(). */
     const GrCoordTransform& coordTransform(int index) const { return *fCoordTransforms[index]; }
+    GrCoordTransform& coordTransform(int index) { return *fCoordTransforms[index]; }
 
     const SkTArray<GrCoordTransform*, true>& coordTransforms() const {
         return fCoordTransforms;
@@ -120,6 +121,7 @@
 
     int numChildProcessors() const { return fChildProcessors.count(); }
 
+    GrFragmentProcessor& childProcessor(int index) { return *fChildProcessors[index]; }
     const GrFragmentProcessor& childProcessor(int index) const { return *fChildProcessors[index]; }
 
     SkDEBUGCODE(bool isInstantiated() const;)
@@ -201,35 +203,54 @@
     // hierarchies rooted in a GrPaint, GrProcessorSet, or GrPipeline. For these collections it
     // iterates the tree rooted at each color FP and then each coverage FP.
     //
+    // Iter is the non-const version and CIter is the const version.
+    //
     // An iterator is constructed from one of the srcs and used like this:
     //   for (GrFragmentProcessor::Iter iter(pipeline); iter; ++iter) {
-    //       const GrFragmentProcessor& fp = *iter;
+    //       GrFragmentProcessor& fp = *iter;
     //   }
     // The exit test for the loop is using Iter's operator bool().
-    // To use a range-for loop instead see IterRange below.
+    // To use a range-for loop instead see CIterRange below.
     class Iter;
+    class CIter;
 
-    // Used to implement a range-for loop using Iter. Src is one of GrFragmentProcessor, GrPaint,
-    // GrProcessorSet, or GrPipeline. Type aliases for these defined below.
+    // Used to implement a range-for loop using CIter. Src is one of GrFragmentProcessor,
+    // GrPaint, GrProcessorSet, or GrPipeline. Type aliases for these defined below.
     // Example usage:
     //   for (const auto& fp : GrFragmentProcessor::PaintRange(paint)) {
     //       if (fp.usesLocalCoords()) {
     //       ...
     //       }
     //   }
-    template <typename Src> class IterRange;
+    template <typename Src> class CIterRange;
+    // Like CIterRange but non const and only constructable from GrFragmentProcessor. This could
+    // support GrPaint as it owns non-const FPs but no need for it as of now.
+    //   for (auto& fp0 : GrFragmentProcessor::IterRange(fp)) {
+    //       ...
+    //   }
+    class IterRange;
 
-    // We would use template deduction guides for Iter but for:
+    // We would use template deduction guides for Iter/CIter but for:
     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79501
     // Instead we use these specialized type aliases to make it prettier
     // to construct Iters for particular sources of FPs.
-    using FPRange           = IterRange<GrFragmentProcessor>;
-    using PaintRange        = IterRange<GrPaint>;
+    using FPCRange = CIterRange<GrFragmentProcessor>;
+    using PaintCRange = CIterRange<GrPaint>;
 
+    // Implementation details for iterators that walk an array of Items owned by a set of FPs.
     using CountFn = int (GrFragmentProcessor::*)() const;
-    template <typename Item> using GetFn = const Item& (GrFragmentProcessor::*)(int) const;
-
-    // Implementation detail for iterators that walk an array of things owned by a set of FPs.
+    // Defined GetFn to be a member function that returns an Item by index. The function itself is
+    // const if Item is a const type and non-const if Item is non-const.
+    template <typename Item, bool IsConst = std::is_const<Item>::value> struct GetT;
+    template <typename Item> struct GetT<Item, false> {
+        using GetFn = Item& (GrFragmentProcessor::*)(int);
+    };
+    template <typename Item> struct GetT<Item, true> {
+        using GetFn = const Item& (GrFragmentProcessor::*)(int) const;
+    };
+    template <typename Item> using GetFn = typename GetT<Item>::GetFn;
+    // This is an iterator over the Items owned by a (collection of) FP. CountFn is a FP member that
+    // gets the number of Items owned by each FP and GetFn is a member that gets them by index.
     template <typename Item, CountFn Count, GetFn<Item> Get> class FPItemIter;
 
     // Loops over all the GrCoordTransforms owned by GrFragmentProcessors. The possible sources for
@@ -241,7 +262,7 @@
     //       ...
     //   }
     // See the ranges below to make this simpler a la range-for loops.
-    using CoordTransformIter = FPItemIter<GrCoordTransform,
+    using CoordTransformIter = FPItemIter<const GrCoordTransform,
                                           &GrFragmentProcessor::numCoordTransforms,
                                           &GrFragmentProcessor::coordTransform>;
     // Same as CoordTransformIter but for TextureSamplers:
@@ -252,7 +273,7 @@
     //       ...
     //   }
     // See the ranges below to make this simpler a la range-for loops.
-    using TextureSamplerIter = FPItemIter<TextureSampler,
+    using TextureSamplerIter = FPItemIter<const TextureSampler,
                                           &GrFragmentProcessor::numTextureSamplers,
                                           &GrFragmentProcessor::textureSampler>;
 
@@ -268,10 +289,17 @@
     // to add more as they become useful. Maybe someday we'll have template argument deduction
     // with guides for type aliases and the sources can be removed from the type aliases:
     // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1021r5.html
-    using PipelineCoordTransformRange     = FPItemRange<GrPipeline,          CoordTransformIter>;
-    using PipelineTextureSamplerRange     = FPItemRange<GrPipeline,          TextureSamplerIter>;
-    using FPTextureSamplerRange           = FPItemRange<GrFragmentProcessor, TextureSamplerIter>;
-    using ProcessorSetTextureSamplerRange = FPItemRange<GrProcessorSet,      TextureSamplerIter>;
+    using PipelineCoordTransformRange = FPItemRange<const GrPipeline, CoordTransformIter>;
+    using PipelineTextureSamplerRange = FPItemRange<const GrPipeline, TextureSamplerIter>;
+    using FPTextureSamplerRange = FPItemRange<const GrFragmentProcessor, TextureSamplerIter>;
+    using ProcessorSetTextureSamplerRange = FPItemRange<const GrProcessorSet, TextureSamplerIter>;
+
+    // Not used directly.
+    using NonConstCoordTransformIter =
+            FPItemIter<GrCoordTransform, &GrFragmentProcessor::numCoordTransforms,
+                       &GrFragmentProcessor::coordTransform>;
+    // Iterator over non-const GrCoordTransforms owned by FP and its descendants.
+    using FPCoordTransformRange = FPItemRange<GrFragmentProcessor, NonConstCoordTransformIter>;
 
     // Sentinel type for range-for using Iter.
     class EndIter {};
@@ -391,6 +419,9 @@
     inline static const TextureSampler& IthTextureSampler(int i);
 
 private:
+    // Implementation details of Iter and CIter.
+    template <typename> class IterBase;
+
     virtual SkPMColor4f constantOutputForConstantInput(const SkPMColor4f& /* inputColor */) const {
         SK_ABORT("Subclass must override this if advertising this optimization.");
     }
@@ -492,33 +523,66 @@
 
 //////////////////////////////////////////////////////////////////////////////
 
-class GrFragmentProcessor::Iter {
+template <typename FP> class GrFragmentProcessor::IterBase {
 public:
-    explicit Iter(const GrFragmentProcessor& fp) { fFPStack.push_back(&fp); }
-    explicit Iter(const GrPaint&);
-    explicit Iter(const GrProcessorSet&);
-    explicit Iter(const GrPipeline&);
-
-    const GrFragmentProcessor& operator*() const;
-    const GrFragmentProcessor* operator->() const;
-    Iter& operator++();
+    FP& operator*() const { return *fFPStack.back(); }
+    FP* operator->() const { return fFPStack.back(); }
     operator bool() const { return !fFPStack.empty(); }
     bool operator!=(const EndIter&) { return (bool)*this; }
 
     // Because each iterator carries a stack we want to avoid copies.
-    Iter(const Iter&) = delete;
-    Iter& operator=(const Iter&) = delete;
+    IterBase(const IterBase&) = delete;
+    IterBase& operator=(const IterBase&) = delete;
 
-private:
-    SkSTArray<4, const GrFragmentProcessor*, true> fFPStack;
+protected:
+    void increment();
+
+    IterBase() = default;
+    explicit IterBase(FP& fp) { fFPStack.push_back(&fp); }
+
+    SkSTArray<4, FP*, true> fFPStack;
+};
+
+template <typename FP> void GrFragmentProcessor::IterBase<FP>::increment() {
+    SkASSERT(!fFPStack.empty());
+    FP* back = fFPStack.back();
+    fFPStack.pop_back();
+    for (int i = back->numChildProcessors() - 1; i >= 0; --i) {
+        fFPStack.push_back(&back->childProcessor(i));
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+class GrFragmentProcessor::Iter : public IterBase<GrFragmentProcessor> {
+public:
+    explicit Iter(GrFragmentProcessor& fp) : IterBase(fp) {}
+    Iter& operator++() {
+        this->increment();
+        return *this;
+    }
 };
 
 //////////////////////////////////////////////////////////////////////////////
 
-template <typename Src> class GrFragmentProcessor::IterRange {
+class GrFragmentProcessor::CIter : public IterBase<const GrFragmentProcessor> {
 public:
-    explicit IterRange(const Src& t) : fT(t) {}
-    Iter begin() const { return Iter(fT); }
+    explicit CIter(const GrFragmentProcessor& fp) : IterBase(fp) {}
+    explicit CIter(const GrPaint&);
+    explicit CIter(const GrProcessorSet&);
+    explicit CIter(const GrPipeline&);
+    CIter& operator++() {
+        this->increment();
+        return *this;
+    }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename Src> class GrFragmentProcessor::CIterRange {
+public:
+    explicit CIterRange(const Src& t) : fT(t) {}
+    CIter begin() const { return CIter(fT); }
     EndIter end() const { return EndIter(); }
 
 private:
@@ -530,9 +594,9 @@
 template <typename Item, GrFragmentProcessor::CountFn Count, GrFragmentProcessor::GetFn<Item> Get>
 class GrFragmentProcessor::FPItemIter {
 public:
-    template <typename Src> explicit FPItemIter(const Src& s);
+    template <typename Src> explicit FPItemIter(Src& s);
 
-    std::pair<const Item&, const GrFragmentProcessor&> operator*() const {
+    std::pair<Item&, const GrFragmentProcessor&> operator*() const {
         return {(*fFPIter.*Get)(fIndex), *fFPIter};
     }
     FPItemIter& operator++();
@@ -543,28 +607,13 @@
     FPItemIter& operator=(const FPItemIter&) = delete;
 
 private:
-    Iter fFPIter;
+    typename std::conditional<std::is_const<Item>::value, CIter, Iter>::type fFPIter;
     int fIndex;
 };
 
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename Src, typename ItemIter> class GrFragmentProcessor::FPItemRange {
-public:
-    FPItemRange(const Src& src) : fSrc(src) {}
-    ItemIter begin() const { return ItemIter(fSrc); }
-    FPItemEndIter end() const { return FPItemEndIter(); }
-
-private:
-    const Src& fSrc;
-};
-
-//////////////////////////////////////////////////////////////////////////////
-
 template <typename Item, GrFragmentProcessor::CountFn Count, GrFragmentProcessor::GetFn<Item> Get>
 template <typename Src>
-GrFragmentProcessor::FPItemIter<Item, Count, Get>::FPItemIter(const Src& s)
-        : fFPIter(s), fIndex(-1) {
+GrFragmentProcessor::FPItemIter<Item, Count, Get>::FPItemIter(Src& s) : fFPIter(s), fIndex(-1) {
     if (fFPIter) {
         ++*this;
     }
@@ -582,4 +631,16 @@
     return *this;
 }
 
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename Src, typename ItemIter> class GrFragmentProcessor::FPItemRange {
+public:
+    FPItemRange(Src& src) : fSrc(src) {}
+    ItemIter begin() const { return ItemIter(fSrc); }
+    FPItemEndIter end() const { return FPItemEndIter(); }
+
+private:
+    Src& fSrc;
+};
+
 #endif
diff --git a/src/gpu/GrPathProcessor.cpp b/src/gpu/GrPathProcessor.cpp
index 667a80f..f944e23 100644
--- a/src/gpu/GrPathProcessor.cpp
+++ b/src/gpu/GrPathProcessor.cpp
@@ -93,7 +93,12 @@
         int t = 0;
         for (auto [transform, fp] : transformRange) {
             SkASSERT(fInstalledTransforms[t].fHandle.isValid());
-            const SkMatrix& m = GetTransformMatrix(pathProc.localMatrix(), transform);
+            SkMatrix m;
+            if (fp.coordTransformsApplyToLocalCoords()) {
+                m = GetTransformMatrix(transform, pathProc.localMatrix());
+            } else {
+                m = GetTransformMatrix(transform, SkMatrix::I());
+            }
             if (fInstalledTransforms[t].fCurrentValue.cheapEqualTo(m)) {
                 continue;
             }
diff --git a/src/gpu/GrPrimitiveProcessor.cpp b/src/gpu/GrPrimitiveProcessor.cpp
index cf7b56b..270a14c 100644
--- a/src/gpu/GrPrimitiveProcessor.cpp
+++ b/src/gpu/GrPrimitiveProcessor.cpp
@@ -8,13 +8,15 @@
 #include "src/gpu/GrPrimitiveProcessor.h"
 
 #include "src/gpu/GrCoordTransform.h"
+#include "src/gpu/GrFragmentProcessor.h"
 
 /**
  * We specialize the vertex code for each of these matrix types.
  */
 enum MatrixType {
-    kNoPersp_MatrixType  = 0,
-    kGeneral_MatrixType  = 1,
+    kNone_MatrixType     = 0,
+    kNoPersp_MatrixType  = 1,
+    kGeneral_MatrixType  = 2,
 };
 
 GrPrimitiveProcessor::GrPrimitiveProcessor(ClassID classID) : GrProcessor(classID) {}
@@ -24,19 +26,24 @@
     return this->onTextureSampler(i);
 }
 
-uint32_t
-GrPrimitiveProcessor::getTransformKey(const SkTArray<GrCoordTransform*, true>& coords,
-                                      int numCoords) const {
+uint32_t GrPrimitiveProcessor::computeCoordTransformsKey(const GrFragmentProcessor& fp) const {
+    // This is highly coupled with the code in GrGLSLGeometryProcessor::emitTransforms().
+    SkASSERT(fp.numCoordTransforms() * 2 <= 32);
     uint32_t totalKey = 0;
-    for (int t = 0; t < numCoords; ++t) {
+    for (int t = 0; t < fp.numCoordTransforms(); ++t) {
         uint32_t key = 0;
-        const GrCoordTransform* coordTransform = coords[t];
-        if (coordTransform->matrix().hasPerspective()) {
-            key |= kGeneral_MatrixType;
+        const GrCoordTransform& coordTransform = fp.coordTransform(t);
+        if (!fp.coordTransformsApplyToLocalCoords() && coordTransform.isNoOp()) {
+            key = kNone_MatrixType;
+        } else if (coordTransform.matrix().hasPerspective()) {
+            // Note that we can also have homogeneous varyings as a result of a GP local matrix or
+            // homogeneous local coords generated by GP. We're relying on the GP to include any
+            // variability in those in its key.
+            key = kGeneral_MatrixType;
         } else {
-            key |= kNoPersp_MatrixType;
+            key = kNoPersp_MatrixType;
         }
-        key <<= t;
+        key <<= 2*t;
         SkASSERT(0 == (totalKey & key)); // keys for each transform ought not to overlap
         totalKey |= key;
     }
diff --git a/src/gpu/GrPrimitiveProcessor.h b/src/gpu/GrPrimitiveProcessor.h
index ba9d581..4a0bd72 100644
--- a/src/gpu/GrPrimitiveProcessor.h
+++ b/src/gpu/GrPrimitiveProcessor.h
@@ -169,13 +169,10 @@
     virtual bool willUseGeoShader() const = 0;
 
     /**
-     * Computes a transformKey from an array of coord transforms. Will only look at the first
-     * <numCoords> transforms in the array.
-     *
-     * TODO: A better name for this function  would be "compute" instead of "get".
+     * Computes a key for the transforms owned by an FP based on the shader code that will be
+     * emitted by the primitive processor to implement them.
      */
-    uint32_t getTransformKey(const SkTArray<GrCoordTransform*, true>& coords,
-                             int numCoords) const;
+    uint32_t computeCoordTransformsKey(const GrFragmentProcessor& fp) const;
 
     /**
      * Sets a unique key on the GrProcessorKeyBuilder that is directly associated with this geometry
diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h
index 46a3f8e..60b89ab 100644
--- a/src/gpu/GrProcessor.h
+++ b/src/gpu/GrProcessor.h
@@ -105,6 +105,7 @@
         kGrDistanceFieldA8TextGeoProc_ClassID,
         kGrDistanceFieldLCDTextGeoProc_ClassID,
         kGrDistanceFieldPathGeoProc_ClassID,
+        kGrDomainEffect_ClassID,
         kGrDualIntervalGradientColorizer_ClassID,
         kGrEllipseEffect_ClassID,
         kGrFillRRectOp_Processor_ClassID,
@@ -138,7 +139,6 @@
         kGrSampleMaskProcessor_ClassID,
         kGrSaturateProcessor_ClassID,
         kGrSweepGradientLayout_ClassID,
-        kGrTextureDomainEffect_ClassID,
         kGrTextureGradientColorizer_ClassID,
         kGrTiledGradientEffect_ClassID,
         kGrTwoPointConicalGradientLayout_ClassID,
diff --git a/src/gpu/GrProgramDesc.cpp b/src/gpu/GrProgramDesc.cpp
index c2e1cca..38161c6 100644
--- a/src/gpu/GrProgramDesc.cpp
+++ b/src/gpu/GrProgramDesc.cpp
@@ -167,8 +167,7 @@
 
     fp.getGLSLProcessorKey(*caps.shaderCaps(), b);
 
-    return gen_fp_meta_key(fp, caps, primProc.getTransformKey(fp.coordTransforms(),
-                                                              fp.numCoordTransforms()), b);
+    return gen_fp_meta_key(fp, caps, primProc.computeCoordTransformsKey(fp), b);
 }
 
 bool GrProgramDesc::Build(GrProgramDesc* desc, const GrRenderTarget* renderTarget,
diff --git a/src/gpu/GrTextureProducer.cpp b/src/gpu/GrTextureProducer.cpp
index 4118287..711edb3 100644
--- a/src/gpu/GrTextureProducer.cpp
+++ b/src/gpu/GrTextureProducer.cpp
@@ -58,19 +58,17 @@
 
     GrPaint paint;
 
+    auto fp = GrSimpleTextureEffect::Make(std::move(inputProxy), kUnknown_SkAlphaType,
+                                          SkMatrix::I(), copyParams.fFilter);
     if (needsDomain) {
         const SkRect domain = localRect.makeInset(0.5f, 0.5f);
         // This would cause us to read values from outside the subset. Surely, the caller knows
         // better!
         SkASSERT(copyParams.fFilter != GrSamplerState::Filter::kMipMap);
-        paint.addColorFragmentProcessor(GrTextureDomainEffect::Make(
-                std::move(inputProxy), kUnknown_SkAlphaType, SkMatrix::I(), domain,
-                GrTextureDomain::kClamp_Mode, copyParams.fFilter));
-    } else {
-        GrSamplerState samplerState(GrSamplerState::WrapMode::kClamp, copyParams.fFilter);
-        paint.addColorTextureProcessor(std::move(inputProxy), kUnknown_SkAlphaType, SkMatrix::I(),
-                                       samplerState);
+        fp = GrDomainEffect::Make(std::move(fp), domain, GrTextureDomain::kClamp_Mode,
+                                  copyParams.fFilter);
     }
+    paint.addColorFragmentProcessor(std::move(fp));
     paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
 
     copyRTC->fillRectToRect(GrNoClip(), std::move(paint), GrAA::kNo, SkMatrix::I(), dstRect,
@@ -203,19 +201,18 @@
     bool clampToBorderSupport = fContext->priv().caps()->clampToBorderSupport();
     SkAlphaType srcAlphaType = this->alphaType();
     if (filterOrNullForBicubic) {
+        GrSamplerState::WrapMode wrapMode = fDomainNeedsDecal && clampToBorderSupport
+                                                    ? GrSamplerState::WrapMode::kClampToBorder
+                                                    : GrSamplerState::WrapMode::kClamp;
+        GrSamplerState samplerState(wrapMode, *filterOrNullForBicubic);
+        auto fp = GrSimpleTextureEffect::Make(std::move(proxy), srcAlphaType, textureMatrix,
+                                              samplerState);
         if (kDomain_DomainMode == domainMode || (fDomainNeedsDecal && !clampToBorderSupport)) {
             GrTextureDomain::Mode wrapMode = fDomainNeedsDecal ? GrTextureDomain::kDecal_Mode
                                                                : GrTextureDomain::kClamp_Mode;
-            return GrTextureDomainEffect::Make(std::move(proxy), srcAlphaType, textureMatrix,
-                                               domain, wrapMode, *filterOrNullForBicubic);
-        } else {
-            GrSamplerState::WrapMode wrapMode =
-                    fDomainNeedsDecal ? GrSamplerState::WrapMode::kClampToBorder
-                                      : GrSamplerState::WrapMode::kClamp;
-            GrSamplerState samplerState(wrapMode, *filterOrNullForBicubic);
-            return GrSimpleTextureEffect::Make(std::move(proxy), srcAlphaType, textureMatrix,
-                                               samplerState);
+            return GrDomainEffect::Make(std::move(fp), domain, wrapMode, *filterOrNullForBicubic);
         }
+        return fp;
     } else {
         static const GrSamplerState::WrapMode kClampClamp[] = {
                 GrSamplerState::WrapMode::kClamp, GrSamplerState::WrapMode::kClamp};
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 9428f18..6d6f1bb 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -956,8 +956,10 @@
             static constexpr auto kDir = GrBicubicEffect::Direction::kXY;
             fp = GrBicubicEffect::Make(std::move(proxy), texMatrix, domain, kDir, srcAlphaType);
         } else {
-            fp = GrTextureDomainEffect::Make(std::move(proxy), srcAlphaType, texMatrix, domain,
-                                             GrTextureDomain::kClamp_Mode, samplerState.filter());
+            fp = GrSimpleTextureEffect::Make(std::move(proxy), srcAlphaType, texMatrix,
+                                             samplerState);
+            fp = GrDomainEffect::Make(std::move(fp), domain, GrTextureDomain::kClamp_Mode,
+                                      samplerState.filter());
         }
     } else if (bicubic) {
         SkASSERT(GrSamplerState::Filter::kNearest == samplerState.filter());
diff --git a/src/gpu/ccpr/GrCCDrawPathsOp.cpp b/src/gpu/ccpr/GrCCDrawPathsOp.cpp
index aa99e35..2aa6b59 100644
--- a/src/gpu/ccpr/GrCCDrawPathsOp.cpp
+++ b/src/gpu/ccpr/GrCCDrawPathsOp.cpp
@@ -17,7 +17,7 @@
 #include "src/gpu/ccpr/GrOctoBounds.h"
 
 static bool has_coord_transforms(const GrPaint& paint) {
-    for (const auto& fp : GrFragmentProcessor::PaintRange(paint)) {
+    for (const auto& fp : GrFragmentProcessor::PaintCRange(paint)) {
         if (!fp.coordTransforms().empty()) {
             return true;
         }
diff --git a/src/gpu/dawn/GrDawnProgramBuilder.cpp b/src/gpu/dawn/GrDawnProgramBuilder.cpp
index 86ea295..d7bf830 100644
--- a/src/gpu/dawn/GrDawnProgramBuilder.cpp
+++ b/src/gpu/dawn/GrDawnProgramBuilder.cpp
@@ -519,7 +519,7 @@
     const GrPrimitiveProcessor& primProc = programInfo.primProc();
     GrFragmentProcessor::PipelineCoordTransformRange transformRange(pipeline);
     fGeometryProcessor->setData(fDataManager, primProc, transformRange);
-    GrFragmentProcessor::Iter fpIter(pipeline);
+    GrFragmentProcessor::CIter fpIter(pipeline);
     GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
     for (; fpIter && glslIter; ++fpIter, ++glslIter) {
         glslIter->setData(fDataManager, *fpIter);
diff --git a/src/gpu/effects/GrTextureDomain.cpp b/src/gpu/effects/GrTextureDomain.cpp
index 80907fe..8a0014a 100644
--- a/src/gpu/effects/GrTextureDomain.cpp
+++ b/src/gpu/effects/GrTextureDomain.cpp
@@ -37,8 +37,7 @@
     // We don't currently handle domains that are empty or don't intersect the texture.
     // It is OK if the domain rect is a line or point, but it should not be inverted. We do not
     // handle rects that do not intersect the [0..1]x[0..1] rect.
-    SkASSERT(domain.fLeft <= domain.fRight);
-    SkASSERT(domain.fTop <= domain.fBottom);
+    SkASSERT(domain.isSorted());
     fDomain.fLeft = SkScalarPin(domain.fLeft, 0.0f, kFullRect.fRight);
     fDomain.fRight = SkScalarPin(domain.fRight, fDomain.fLeft, kFullRect.fRight);
     fDomain.fTop = SkScalarPin(domain.fTop, 0.0f, kFullRect.fBottom);
@@ -47,6 +46,13 @@
     SkASSERT(fDomain.fTop <= fDomain.fBottom);
 }
 
+GrTextureDomain::GrTextureDomain(const SkRect& domain, Mode modeX, Mode modeY, int index)
+        : fDomain(domain), fModeX(modeX), fModeY(modeY), fIndex(index) {
+    // We don't currently handle domains that are empty or don't intersect the texture.
+    // It is OK if the domain rect is a line or point, but it should not be inverted.
+    SkASSERT(domain.isSorted());
+}
+
 //////////////////////////////////////////////////////////////////////////////
 
 static SkString clamp_expression(GrTextureDomain::Mode mode, const char* inCoord,
@@ -76,6 +82,22 @@
     return clampedExpr;
 }
 
+void GrTextureDomain::GLDomain::sampleProcessor(const GrTextureDomain& textureDomain,
+                                                const char* inColor,
+                                                const char* outColor,
+                                                const SkString& inCoords,
+                                                GrGLSLFragmentProcessor* parent,
+                                                GrGLSLFragmentProcessor::EmitArgs& args,
+                                                int childIndex) {
+    auto appendProcessorSample = [parent, &args, childIndex, inColor](const char* coord) {
+        SkString outColor("childColor");
+        parent->invokeChild(childIndex, inColor, &outColor, args, coord);
+        return outColor;
+    };
+    this->sample(args.fFragBuilder, args.fUniformHandler, textureDomain, outColor, inCoords,
+                 appendProcessorSample);
+}
+
 void GrTextureDomain::GLDomain::sampleTexture(GrGLSLShaderBuilder* builder,
                                               GrGLSLUniformHandler* uniformHandler,
                                               const GrShaderCaps* shaderCaps,
@@ -84,6 +106,21 @@
                                               const SkString& inCoords,
                                               GrGLSLFragmentProcessor::SamplerHandle sampler,
                                               const char* inModulateColor) {
+    auto appendTextureSample = [&sampler, inModulateColor, builder](const char* coord) {
+        builder->codeAppend("half4 textureColor = ");
+        builder->appendTextureLookupAndModulate(inModulateColor, sampler, coord);
+        builder->codeAppend(";");
+        return SkString("textureColor");
+    };
+    this->sample(builder, uniformHandler, textureDomain, outColor, inCoords, appendTextureSample);
+}
+
+void GrTextureDomain::GLDomain::sample(GrGLSLShaderBuilder* builder,
+                                       GrGLSLUniformHandler* uniformHandler,
+                                       const GrTextureDomain& textureDomain,
+                                       const char* outColor,
+                                       const SkString& inCoords,
+                                       const std::function<AppendSample>& appendSample) {
     SkASSERT(!fHasMode || (textureDomain.modeX() == fModeX && textureDomain.modeY() == fModeY));
     SkDEBUGCODE(fModeX = textureDomain.modeX();)
     SkDEBUGCODE(fModeY = textureDomain.modeY();)
@@ -139,11 +176,8 @@
     }
     builder->codeAppend(";");
 
-    // Look up the texture sample at the clamped coordinate location
-    builder->codeAppend("half4 inside = ");
-    builder->appendTextureLookupAndModulate(inModulateColor, sampler, "clampedCoord",
-                                            kFloat2_GrSLType);
-    builder->codeAppend(";");
+    // Sample 'appendSample' at the clamped coordinate location.
+    SkString color = appendSample("clampedCoord");
 
     // Apply decal mode's transparency interpolation if needed
     if (decalX || decalY) {
@@ -170,174 +204,232 @@
         // is set to 1 and it becomes a simple linear blend between texture and transparent.
         builder->codeAppendf("if (err > %s.z) { err = 1.0; } else if (%s.z < 1) { err = 0.0; }",
                              fDecalName.c_str(), fDecalName.c_str());
-        builder->codeAppendf("%s = mix(inside, half4(0, 0, 0, 0), err);", outColor);
+        builder->codeAppendf("%s = mix(%s, half4(0, 0, 0, 0), err);", outColor, color.c_str());
     } else {
         // A simple look up
-        builder->codeAppendf("%s = inside;", outColor);
+        builder->codeAppendf("%s = %s;", outColor, color.c_str());
     }
 }
 
 void GrTextureDomain::GLDomain::setData(const GrGLSLProgramDataManager& pdman,
                                         const GrTextureDomain& textureDomain,
-                                        GrSurfaceProxy* proxy,
-                                        const GrSamplerState& sampler) {
-    GrTexture* tex = proxy->peekTexture();
-    SkASSERT(fHasMode && textureDomain.modeX() == fModeX && textureDomain.modeY() == fModeY);
-    if (kIgnore_Mode != textureDomain.modeX() || kIgnore_Mode != textureDomain.modeY()) {
-        bool sendDecalData = textureDomain.modeX() == kDecal_Mode ||
-                             textureDomain.modeY() == kDecal_Mode;
+                                        const GrSurfaceProxy* proxy,
+                                        const GrSamplerState& state) {
+    // We want a hard transition from texture content to trans-black in nearest mode.
+    bool filterDecal = state.filter() != GrSamplerState::Filter::kNearest;
+    this->setData(pdman, textureDomain, proxy, filterDecal);
+}
 
-        // If the texture is using nearest filtering, then the decal filter weight should step from
-        // 0 (texture) to 1 (transparent) one half pixel away from the domain. When doing any other
-        // form of filtering, the weight should be 1.0 so that it smoothly interpolates between the
-        // texture and transparent.
-        SkScalar decalFilterWeight = sampler.filter() == GrSamplerState::Filter::kNearest ?
-                SK_ScalarHalf : 1.0f;
+void GrTextureDomain::GLDomain::setData(const GrGLSLProgramDataManager& pdman,
+                                        const GrTextureDomain& textureDomain,
+                                        bool filterIfDecal) {
+    this->setData(pdman, textureDomain, nullptr, filterIfDecal);
+}
+
+void GrTextureDomain::GLDomain::setData(const GrGLSLProgramDataManager& pdman,
+                                        const GrTextureDomain& textureDomain,
+                                        const GrSurfaceProxy* proxy,
+                                        bool filterIfDecal) {
+    SkASSERT(fHasMode && textureDomain.modeX() == fModeX && textureDomain.modeY() == fModeY);
+    if (kIgnore_Mode == textureDomain.modeX() && kIgnore_Mode == textureDomain.modeY()) {
+        return;
+    }
+    // If the texture is using nearest filtering, then the decal filter weight should step from
+    // 0 (texture) to 1 (transparent) one half pixel away from the domain. When doing any other
+    // form of filtering, the weight should be 1.0 so that it smoothly interpolates between the
+    // texture and transparent.
+    // Start off assuming we're in pixel units and later adjust if we have to deal with normalized
+    // texture coords.
+    float decalFilterWeights[3] = {1.f, 1.f, filterIfDecal ? 1.f : 0.5f};
+    bool sendDecalData = textureDomain.modeX() == kDecal_Mode ||
+                         textureDomain.modeY() == kDecal_Mode;
+    float tempDomainValues[4];
+    const float* values;
+    if (proxy) {
         SkScalar wInv, hInv, h;
+        GrTexture* tex = proxy->peekTexture();
         if (proxy->backendFormat().textureType() == GrTextureType::kRectangle) {
             wInv = hInv = 1.f;
             h = tex->height();
-
-            // Don't do any scaling by texture size for decal filter rate, it's already in pixels
-            if (sendDecalData) {
-                pdman.set3f(fDecalUni, 1.f, 1.f, decalFilterWeight);
-            }
+            // Don't do any scaling by texture size for decal filter rate, it's already in
+            // pixels
         } else {
             wInv = SK_Scalar1 / tex->width();
             hInv = SK_Scalar1 / tex->height();
             h = 1.f;
 
-            if (sendDecalData) {
-                pdman.set3f(fDecalUni, tex->width(), tex->height(), decalFilterWeight);
-            }
+            // Account for texture coord normalization in decal filter weights.
+            decalFilterWeights[0] = tex->width();
+            decalFilterWeights[1] = tex->height();
         }
 
-        float values[kPrevDomainCount] = {
-            SkScalarToFloat(textureDomain.domain().fLeft * wInv),
-            SkScalarToFloat(textureDomain.domain().fTop * hInv),
-            SkScalarToFloat(textureDomain.domain().fRight * wInv),
-            SkScalarToFloat(textureDomain.domain().fBottom * hInv)
-        };
+        tempDomainValues[0] = SkScalarToFloat(textureDomain.domain().fLeft * wInv);
+        tempDomainValues[1] = SkScalarToFloat(textureDomain.domain().fTop * hInv);
+        tempDomainValues[2] = SkScalarToFloat(textureDomain.domain().fRight * wInv);
+        tempDomainValues[3] = SkScalarToFloat(textureDomain.domain().fBottom * hInv);
 
         if (proxy->backendFormat().textureType() == GrTextureType::kRectangle) {
-            SkASSERT(values[0] >= 0.0f && values[0] <= proxy->width());
-            SkASSERT(values[1] >= 0.0f && values[1] <= proxy->height());
-            SkASSERT(values[2] >= 0.0f && values[2] <= proxy->width());
-            SkASSERT(values[3] >= 0.0f && values[3] <= proxy->height());
+            SkASSERT(tempDomainValues[0] >= 0.0f && tempDomainValues[0] <= proxy->width());
+            SkASSERT(tempDomainValues[1] >= 0.0f && tempDomainValues[1] <= proxy->height());
+            SkASSERT(tempDomainValues[2] >= 0.0f && tempDomainValues[2] <= proxy->width());
+            SkASSERT(tempDomainValues[3] >= 0.0f && tempDomainValues[3] <= proxy->height());
         } else {
-            SkASSERT(values[0] >= 0.0f && values[0] <= 1.0f);
-            SkASSERT(values[1] >= 0.0f && values[1] <= 1.0f);
-            SkASSERT(values[2] >= 0.0f && values[2] <= 1.0f);
-            SkASSERT(values[3] >= 0.0f && values[3] <= 1.0f);
+            SkASSERT(tempDomainValues[0] >= 0.0f && tempDomainValues[0] <= 1.0f);
+            SkASSERT(tempDomainValues[1] >= 0.0f && tempDomainValues[1] <= 1.0f);
+            SkASSERT(tempDomainValues[2] >= 0.0f && tempDomainValues[2] <= 1.0f);
+            SkASSERT(tempDomainValues[3] >= 0.0f && tempDomainValues[3] <= 1.0f);
         }
 
         // vertical flip if necessary
         if (kBottomLeft_GrSurfaceOrigin == proxy->origin()) {
-            values[1] = h - values[1];
-            values[3] = h - values[3];
+            tempDomainValues[1] = h - tempDomainValues[1];
+            tempDomainValues[3] = h - tempDomainValues[3];
 
             // The top and bottom were just flipped, so correct the ordering
             // of elements so that values = (l, t, r, b).
             using std::swap;
-            swap(values[1], values[3]);
+            swap(tempDomainValues[1], tempDomainValues[3]);
         }
-        if (0 != memcmp(values, fPrevDomain, kPrevDomainCount * sizeof(float))) {
-            pdman.set4fv(fDomainUni, 1, values);
-            memcpy(fPrevDomain, values, kPrevDomainCount * sizeof(float));
-        }
+        values = tempDomainValues;
+    } else {
+        values = textureDomain.domain().asScalars();
+    }
+    if (!std::equal(values, values + 4, fPrevDomain)) {
+        pdman.set4fv(fDomainUni, 1, values);
+        std::copy_n(values, 4, fPrevDomain);
+    }
+    if (sendDecalData &&
+        !std::equal(decalFilterWeights, decalFilterWeights + 3, fPrevDeclFilterWeights)) {
+        pdman.set3fv(fDecalUni, 1, decalFilterWeights);
+        std::copy_n(decalFilterWeights, 3, fPrevDeclFilterWeights);
     }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
-std::unique_ptr<GrFragmentProcessor> GrTextureDomainEffect::Make(
-        sk_sp<GrSurfaceProxy> proxy,
-        SkAlphaType srcAlphaType,
-        const SkMatrix& matrix,
-        const SkRect& domain,
-        GrTextureDomain::Mode mode,
-        GrSamplerState::Filter filterMode) {
-    return Make(std::move(proxy), srcAlphaType, matrix, domain, mode, mode,
-                GrSamplerState(GrSamplerState::WrapMode::kClamp, filterMode));
+std::unique_ptr<GrFragmentProcessor> GrDomainEffect::Make(std::unique_ptr<GrFragmentProcessor> fp,
+                                                          const SkRect& domain,
+                                                          GrTextureDomain::Mode mode,
+                                                          bool decalIsFiltered) {
+    return Make(std::move(fp), domain, mode, mode, decalIsFiltered);
 }
 
-std::unique_ptr<GrFragmentProcessor> GrTextureDomainEffect::Make(sk_sp<GrSurfaceProxy> proxy,
-                                                                 SkAlphaType srcAlphaType,
-                                                                 const SkMatrix& matrix,
-                                                                 const SkRect& domain,
-                                                                 GrTextureDomain::Mode modeX,
-                                                                 GrTextureDomain::Mode modeY,
-                                                                 const GrSamplerState& sampler) {
+std::unique_ptr<GrFragmentProcessor> GrDomainEffect::Make(std::unique_ptr<GrFragmentProcessor> fp,
+                                                          const SkRect& domain,
+                                                          GrTextureDomain::Mode modeX,
+                                                          GrTextureDomain::Mode modeY,
+                                                          bool decalIsFiltered) {
+    if (modeX == GrTextureDomain::kIgnore_Mode && modeY == GrTextureDomain::kIgnore_Mode) {
+        return fp;
+    }
+    int count = 0;
+    GrCoordTransform* coordTransform = nullptr;
+    for (auto [transform, ignored] : GrFragmentProcessor::FPCoordTransformRange(*fp)) {
+        ++count;
+        coordTransform = &transform;
+    }
+    // If there are no coord transforms on the passed FP or it's children then there's no need to
+    // enforce a domain.
+    // We have a limitation that only one coord transform is support when overriding local coords.
+    // If that limit were relaxed we would need to add a coord transform for each descendent FP
+    // transform and possibly have multiple domain rects to account for different proxy
+    // normalization and y-reversals.
+    if (count != 1) {
+        return fp;
+    }
+    GrCoordTransform transformCopy = *coordTransform;
+    // Reset the child FP's coord transform.
+    *coordTransform = {};
     // If both domain modes happen to be ignore, it would be faster to just drop the domain logic
-    // entirely Technically, we could also use the simple texture effect if the domain modes agree
-    // with the sampler modes and the proxy is the same size as the domain. It's a lot easier for
-    // calling code to detect these cases and handle it themselves.
-    return std::unique_ptr<GrFragmentProcessor>(new GrTextureDomainEffect(
-            std::move(proxy), srcAlphaType, matrix, domain, modeX, modeY, sampler));
+    // entirely and return the original FP. We'd need a GrMatrixProcessor if the matrix is not
+    // identity, though.
+    return std::unique_ptr<GrFragmentProcessor>(new GrDomainEffect(
+            std::move(fp), transformCopy, domain, modeX, modeY, decalIsFiltered));
 }
 
-GrTextureDomainEffect::GrTextureDomainEffect(sk_sp<GrSurfaceProxy> proxy,
-                                             SkAlphaType srcAlphaType,
-                                             const SkMatrix& matrix,
-                                             const SkRect& domain,
-                                             GrTextureDomain::Mode modeX,
-                                             GrTextureDomain::Mode modeY,
-                                             const GrSamplerState& sampler)
-        : INHERITED(kGrTextureDomainEffect_ClassID,
-                    ModulateForSamplerOptFlags(
-                            srcAlphaType, GrTextureDomain::IsDecalSampled(sampler, modeX, modeY)))
-        , fCoordTransform(matrix, proxy.get())
-        , fTextureDomain(proxy.get(), domain, modeX, modeY)
-        , fTextureSampler(std::move(proxy), sampler) {
-    SkASSERT((modeX != GrTextureDomain::kRepeat_Mode && modeY != GrTextureDomain::kRepeat_Mode) ||
-             sampler.filter() == GrSamplerState::Filter::kNearest);
+std::unique_ptr<GrFragmentProcessor> GrDomainEffect::Make(std::unique_ptr<GrFragmentProcessor> fp,
+                                                          const SkRect& domain,
+                                                          GrTextureDomain::Mode mode,
+                                                          GrSamplerState::Filter filter) {
+    bool filterIfDecal = filter != GrSamplerState::Filter::kNearest;
+    return Make(std::move(fp), domain, mode, filterIfDecal);
+}
+
+std::unique_ptr<GrFragmentProcessor> GrDomainEffect::Make(std::unique_ptr<GrFragmentProcessor> fp,
+                                                          const SkRect& domain,
+                                                          GrTextureDomain::Mode modeX,
+                                                          GrTextureDomain::Mode modeY,
+                                                          GrSamplerState::Filter filter) {
+    bool filterIfDecal = filter != GrSamplerState::Filter::kNearest;
+    return Make(std::move(fp), domain, modeX, modeY, filterIfDecal);
+}
+GrFragmentProcessor::OptimizationFlags GrDomainEffect::Flags(GrFragmentProcessor* fp,
+                                                             GrTextureDomain::Mode modeX,
+                                                             GrTextureDomain::Mode modeY) {
+    auto fpFlags = GrFragmentProcessor::ProcessorOptimizationFlags(fp);
+    if (modeX == GrTextureDomain::kDecal_Mode || modeY == GrTextureDomain::kDecal_Mode) {
+        return fpFlags & ~kPreservesOpaqueInput_OptimizationFlag;
+    }
+    return fpFlags;
+}
+
+GrDomainEffect::GrDomainEffect(std::unique_ptr<GrFragmentProcessor> fp,
+                               const GrCoordTransform& coordTransform,
+                               const SkRect& domain,
+                               GrTextureDomain::Mode modeX,
+                               GrTextureDomain::Mode modeY,
+                               bool decalIsFiltered)
+        : INHERITED(kGrDomainEffect_ClassID, Flags(fp.get(), modeX, modeY))
+        , fCoordTransform(coordTransform)
+        , fDomain(domain, modeX, modeY)
+        , fDecalIsFiltered(decalIsFiltered) {
+    SkASSERT(fp);
+    fp->setSampledWithExplicitCoords(true);
+    this->registerChildProcessor(std::move(fp));
     this->addCoordTransform(&fCoordTransform);
-    this->setTextureSamplerCnt(1);
+    if (fDomain.modeX() != GrTextureDomain::kDecal_Mode &&
+        fDomain.modeY() != GrTextureDomain::kDecal_Mode) {
+        // Canonicalize this don't care value so we don't have to worry about it elsewhere.
+        fDecalIsFiltered = false;
+    }
 }
 
-GrTextureDomainEffect::GrTextureDomainEffect(const GrTextureDomainEffect& that)
-        : INHERITED(kGrTextureDomainEffect_ClassID, that.optimizationFlags())
+GrDomainEffect::GrDomainEffect(const GrDomainEffect& that)
+        : INHERITED(kGrDomainEffect_ClassID, that.optimizationFlags())
         , fCoordTransform(that.fCoordTransform)
-        , fTextureDomain(that.fTextureDomain)
-        , fTextureSampler(that.fTextureSampler) {
+        , fDomain(that.fDomain)
+        , fDecalIsFiltered(that.fDecalIsFiltered) {
+    auto child = that.childProcessor(0).clone();
+    child->setSampledWithExplicitCoords(true);
+    this->registerChildProcessor(std::move(child));
     this->addCoordTransform(&fCoordTransform);
-    this->setTextureSamplerCnt(1);
 }
 
-void GrTextureDomainEffect::onGetGLSLProcessorKey(const GrShaderCaps& caps,
-                                                  GrProcessorKeyBuilder* b) const {
-    b->add32(GrTextureDomain::GLDomain::DomainKey(fTextureDomain));
+void GrDomainEffect::onGetGLSLProcessorKey(const GrShaderCaps& caps,
+                                           GrProcessorKeyBuilder* b) const {
+    b->add32(GrTextureDomain::GLDomain::DomainKey(fDomain));
 }
 
-GrGLSLFragmentProcessor* GrTextureDomainEffect::onCreateGLSLInstance() const  {
+GrGLSLFragmentProcessor* GrDomainEffect::onCreateGLSLInstance() const {
     class GLSLProcessor : public GrGLSLFragmentProcessor {
     public:
         void emitCode(EmitArgs& args) override {
-            const GrTextureDomainEffect& tde = args.fFp.cast<GrTextureDomainEffect>();
-            const GrTextureDomain& domain = tde.fTextureDomain;
+            const GrDomainEffect& de = args.fFp.cast<GrDomainEffect>();
+            const GrTextureDomain& domain = de.fDomain;
 
-            GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
             SkString coords2D =
-                              fragBuilder->ensureCoords2D(args.fTransformedCoords[0].fVaryingPoint);
+                    args.fFragBuilder->ensureCoords2D(args.fTransformedCoords[0].fVaryingPoint);
 
-            fGLDomain.sampleTexture(fragBuilder,
-                                    args.fUniformHandler,
-                                    args.fShaderCaps,
-                                    domain,
-                                    args.fOutputColor,
-                                    coords2D,
-                                    args.fTexSamplers[0],
-                                    args.fInputColor);
+            fGLDomain.sampleProcessor(domain, args.fInputColor, args.fOutputColor, coords2D, this,
+                                      args, 0);
         }
 
     protected:
         void onSetData(const GrGLSLProgramDataManager& pdman,
                        const GrFragmentProcessor& fp) override {
-            const GrTextureDomainEffect& tde = fp.cast<GrTextureDomainEffect>();
-            const GrTextureDomain& domain = tde.fTextureDomain;
-            GrSurfaceProxy* proxy = tde.textureSampler(0).proxy();
-
-            fGLDomain.setData(pdman, domain, proxy, tde.textureSampler(0).samplerState());
+            const GrDomainEffect& de = fp.cast<GrDomainEffect>();
+            const GrTextureDomain& domain = de.fDomain;
+            fGLDomain.setData(pdman, domain, de.fCoordTransform.proxy(), de.fDecalIsFiltered);
         }
 
     private:
@@ -347,43 +439,51 @@
     return new GLSLProcessor;
 }
 
-bool GrTextureDomainEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
-    const GrTextureDomainEffect& s = sBase.cast<GrTextureDomainEffect>();
-    return this->fTextureDomain == s.fTextureDomain;
+bool GrDomainEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
+    auto& td = sBase.cast<GrDomainEffect>();
+    return fDomain == td.fDomain && fDecalIsFiltered == td.fDecalIsFiltered;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
-GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrTextureDomainEffect);
+GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrDomainEffect);
 
 #if GR_TEST_UTILS
-std::unique_ptr<GrFragmentProcessor> GrTextureDomainEffect::TestCreate(GrProcessorTestData* d) {
-    int texIdx = d->fRandom->nextBool() ? GrProcessorUnitTest::kSkiaPMTextureIdx
-                                        : GrProcessorUnitTest::kAlphaTextureIdx;
-    sk_sp<GrTextureProxy> proxy = d->textureProxy(texIdx);
-    SkRect domain;
-    domain.fLeft = d->fRandom->nextRangeScalar(0, proxy->width());
-    domain.fRight = d->fRandom->nextRangeScalar(domain.fLeft, proxy->width());
-    domain.fTop = d->fRandom->nextRangeScalar(0, proxy->height());
-    domain.fBottom = d->fRandom->nextRangeScalar(domain.fTop, proxy->height());
-    GrTextureDomain::Mode modeX =
-        (GrTextureDomain::Mode) d->fRandom->nextULessThan(GrTextureDomain::kModeCount);
-    GrTextureDomain::Mode modeY =
-        (GrTextureDomain::Mode) d->fRandom->nextULessThan(GrTextureDomain::kModeCount);
-    const SkMatrix& matrix = GrTest::TestMatrix(d->fRandom);
-    bool bilerp = modeX != GrTextureDomain::kRepeat_Mode && modeY != GrTextureDomain::kRepeat_Mode ?
-            d->fRandom->nextBool() : false;
-    auto alphaType = static_cast<SkAlphaType>(
-            d->fRandom->nextRangeU(kUnknown_SkAlphaType + 1, kLastEnum_SkAlphaType));
-    return GrTextureDomainEffect::Make(std::move(proxy),
-                                       alphaType,
-                                       matrix,
-                                       domain,
-                                       modeX,
-                                       modeY,
-                                       GrSamplerState(GrSamplerState::WrapMode::kClamp,
-                                                      bilerp ? GrSamplerState::Filter::kBilerp
-                                                             : GrSamplerState::Filter::kNearest));
+std::unique_ptr<GrFragmentProcessor> GrDomainEffect::TestCreate(GrProcessorTestData* d) {
+    do {
+        GrTextureDomain::Mode modeX =
+                (GrTextureDomain::Mode)d->fRandom->nextULessThan(GrTextureDomain::kModeCount);
+        GrTextureDomain::Mode modeY =
+                (GrTextureDomain::Mode)d->fRandom->nextULessThan(GrTextureDomain::kModeCount);
+        auto child = GrProcessorUnitTest::MakeChildFP(d);
+        const auto* childPtr = child.get();
+        SkRect domain;
+        // We assert if the child's coord transform has a proxy and the domain rect is outside its
+        // bounds.
+        GrFragmentProcessor::CoordTransformIter ctIter(*child);
+        if (!ctIter) {
+            continue;
+        }
+        auto [transform, fp] = *ctIter;
+        if (auto proxy = transform.proxy()) {
+            auto [w, h] = proxy->backingStoreDimensions();
+            domain.fLeft   = d->fRandom->nextRangeScalar(0, w);
+            domain.fRight  = d->fRandom->nextRangeScalar(0, w);
+            domain.fTop    = d->fRandom->nextRangeScalar(0, h);
+            domain.fBottom = d->fRandom->nextRangeScalar(0, h);
+        } else {
+            domain.fLeft   = d->fRandom->nextRangeScalar(-100.f, 100.f);
+            domain.fRight  = d->fRandom->nextRangeScalar(-100.f, 100.f);
+            domain.fTop    = d->fRandom->nextRangeScalar(-100.f, 100.f);
+            domain.fBottom = d->fRandom->nextRangeScalar(-100.f, 100.f);
+        }
+        domain.sort();
+        bool filterIfDecal = d->fRandom->nextBool();
+        auto result = GrDomainEffect::Make(std::move(child), domain, modeX, modeY, filterIfDecal);
+        if (result && result.get() != childPtr) {
+            return result;
+        }
+    } while (true);
 }
 #endif
 
diff --git a/src/gpu/effects/GrTextureDomain.h b/src/gpu/effects/GrTextureDomain.h
index 1361c4d..073a15b 100644
--- a/src/gpu/effects/GrTextureDomain.h
+++ b/src/gpu/effects/GrTextureDomain.h
@@ -5,8 +5,8 @@
  * found in the LICENSE file.
  */
 
-#ifndef GrTextureDomainEffect_DEFINED
-#define GrTextureDomainEffect_DEFINED
+#ifndef GrTextureDomain_DEFINED
+#define GrTextureDomain_DEFINED
 
 #include "src/gpu/GrCoordTransform.h"
 #include "src/gpu/GrFragmentProcessor.h"
@@ -50,6 +50,16 @@
     }
 
     /**
+     * Construct a domain used to sample a GrFragmentProcessor.
+     *
+     * @param index     Pass a value >= 0 if using multiple texture domains in the same effect.
+     *                  It is used to keep inserted variables from causing name collisions.
+     */
+    GrTextureDomain(const SkRect& domain, Mode modeX, Mode modeY, int index = -1);
+
+    /**
+     * Construct a domain used to directly sampler a texture.
+     *
      * @param index     Pass a value >= 0 if using multiple texture domains in the same effect.
      *                  It is used to keep inserted variables from causing name collisions.
      */
@@ -104,6 +114,7 @@
                (kIgnore_Mode == fModeY || (fDomain.fTop == that.fDomain.fTop &&
                                            fDomain.fBottom == that.fDomain.fBottom));
     }
+    bool operator!=(const GrTextureDomain& that) const { return !(*this == that); }
 
     /**
      * A GrGLSLFragmentProcessor subclass that corresponds to a GrProcessor subclass that uses
@@ -113,19 +124,32 @@
      */
     class GLDomain {
     public:
-        GLDomain() {
-            for (int i = 0; i < kPrevDomainCount; i++) {
-                fPrevDomain[i] = SK_FloatNaN;
-            }
-        }
+        GLDomain() = default;
 
         /**
-         * Call this from GrGLSLFragmentProcessor::emitCode() to sample the texture W.R.T. the
+         * Call this from GrGLSLFragmentProcessor::emitCode() to sample a child processor WRT the
          * domain and mode.
          *
          * @param outcolor  name of half4 variable to hold the sampled color.
-         * @param inCoords  name of float2 variable containing the coords to be used with the domain.
-         *                  It is assumed that this is a variable and not an expression.
+         * @param inCoords  name of float2 variable containing the coords to be used with the
+         *                  domain.
+         * @param inColor   color passed to the child processor.
+         */
+        void sampleProcessor(const GrTextureDomain& textureDomain,
+                             const char* inColor,
+                             const char* outColor,
+                             const SkString& inCoords,
+                             GrGLSLFragmentProcessor* parent,
+                             GrGLSLFragmentProcessor::EmitArgs& args,
+                             int childIndex);
+
+        /**
+         * Call this from GrGLSLFragmentProcessor::emitCode() to sample the texture WRT the domain
+         * and mode.
+         *
+         * @param outcolor  name of half4 variable to hold the sampled color.
+         * @param inCoords  name of float2 variable containing the coords to be used with the
+         *                  domain.
          * @param inModulateColor   if non-nullptr the sampled color will be modulated with this
          *                          expression before being written to outColor.
          */
@@ -140,11 +164,23 @@
 
         /**
          * Call this from GrGLSLFragmentProcessor::setData() to upload uniforms necessary for the
-         * texture domain. The rectangle is automatically adjusted to account for the texture's
-         * origin.
+         * domain. 'filterIfDecal' determines whether the transition to transparent black at the
+         * edge of domain is linearly interpolated over a unit interval or is "hard" when
+         * kDecal_Mode is used.
          */
-        void setData(const GrGLSLProgramDataManager&, const GrTextureDomain&, GrSurfaceProxy*,
-                     const GrSamplerState& sampler);
+        void setData(const GrGLSLProgramDataManager&, const GrTextureDomain&, bool filterIfDecal);
+
+        /**
+         * Call this from GrGLSLFragmentProcessor::setData() to upload uniforms necessary for the
+         * texture domain used with a texture proxy. The rectangle is automatically adjusted to
+         * account for the texture's origin. Filtering at the edge of the domain is inferred from
+         * the GrSamplerState's filter mode.
+         */
+        void setData(const GrGLSLProgramDataManager&, const GrTextureDomain&, const GrSurfaceProxy*,
+                     const GrSamplerState& state);
+        /** Same as above but with direct control over decal filtering. */
+        void setData(const GrGLSLProgramDataManager&, const GrTextureDomain&, const GrSurfaceProxy*,
+                     bool filterIfDecal);
 
         enum {
             kModeBits = 2, // See DomainKey().
@@ -161,7 +197,17 @@
         }
 
     private:
-        static const int kPrevDomainCount = 4;
+        // Takes a builder and a coord and appends to the builder a string that is an expression
+        // the evaluates to a half4 color.
+        using AppendSample = SkString(const char* coord);
+
+        void sample(GrGLSLShaderBuilder* builder,
+                    GrGLSLUniformHandler* uniformHandler,
+                    const GrTextureDomain& textureDomain,
+                    const char* outColor,
+                    const SkString& inCoords,
+                    const std::function<AppendSample>& color);
+
         SkDEBUGCODE(Mode                        fModeX;)
         SkDEBUGCODE(Mode                        fModeY;)
         SkDEBUGCODE(bool                        fHasMode = false;)
@@ -172,67 +218,84 @@
         GrGLSLProgramDataManager::UniformHandle fDecalUni;
         SkString                                fDecalName;
 
-        float                                   fPrevDomain[kPrevDomainCount];
+        float                                   fPrevDomain[4] = {SK_FloatNaN};
+        float                                   fPrevDeclFilterWeights[3] = {SK_FloatNaN};
     };
 
 protected:
+    SkRect  fDomain;
     Mode    fModeX;
     Mode    fModeY;
-    SkRect  fDomain;
     int     fIndex;
 };
 
 /**
- * A basic texture effect that uses GrTextureDomain.
+ * This effect applies a domain rectangle with an edge "mode" to the result of the child FP's coord
+ * transform. Currently the passed FP (including its descendants) must have exactly 1 coord
+ * transform (due to internal program builder restrictions). Also, it's important to note that the
+ * domain rectangle is applied  AFTER the corod transform. This allows us to continue to lift the
+ * coord transform to the vertex shader. It might make this nicer for some use cases to add a
+ * pre-coord transform option and try to adjust the domain rect internally to convert to
+ * post-coord transform and keep everything in the vertex shader for simple use cases.
  */
-class GrTextureDomainEffect : public GrFragmentProcessor {
+class GrDomainEffect : public GrFragmentProcessor {
 public:
-    static std::unique_ptr<GrFragmentProcessor> Make(sk_sp<GrSurfaceProxy>,
-                                                     SkAlphaType srcAlphaType,
-                                                     const SkMatrix&,
+    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor>,
                                                      const SkRect& domain,
-                                                     GrTextureDomain::Mode mode,
-                                                     GrSamplerState::Filter filterMode);
+                                                     GrTextureDomain::Mode,
+                                                     bool decalIsFiltered);
 
-    static std::unique_ptr<GrFragmentProcessor> Make(sk_sp<GrSurfaceProxy>,
-                                                     SkAlphaType srcAlphaType,
-                                                     const SkMatrix&,
+    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor>,
                                                      const SkRect& domain,
                                                      GrTextureDomain::Mode modeX,
                                                      GrTextureDomain::Mode modeY,
-                                                     const GrSamplerState& sampler);
+                                                     bool decalIsFiltered);
 
-    const char* name() const override { return "TextureDomain"; }
+    // These variants infer decalIsFiltered from the Filter mode (true if not kNearest).
+    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor>,
+                                                     const SkRect& domain,
+                                                     GrTextureDomain::Mode,
+                                                     GrSamplerState::Filter);
+
+    static std::unique_ptr<GrFragmentProcessor> Make(std::unique_ptr<GrFragmentProcessor>,
+                                                     const SkRect& domain,
+                                                     GrTextureDomain::Mode modeX,
+                                                     GrTextureDomain::Mode modeY,
+                                                     GrSamplerState::Filter);
+
+    const char* name() const override { return "Domain"; }
 
     std::unique_ptr<GrFragmentProcessor> clone() const override {
-        return std::unique_ptr<GrFragmentProcessor>(new GrTextureDomainEffect(*this));
+        return std::unique_ptr<GrFragmentProcessor>(new GrDomainEffect(*this));
     }
 
 #ifdef SK_DEBUG
     SkString dumpInfo() const override {
         SkString str;
-        str.appendf("Domain: [L: %.2f, T: %.2f, R: %.2f, B: %.2f]",
-                    fTextureDomain.domain().fLeft, fTextureDomain.domain().fTop,
-                    fTextureDomain.domain().fRight, fTextureDomain.domain().fBottom);
+        str.appendf("Domain: [L: %.2f, T: %.2f, R: %.2f, B: %.2f], filterDecal: %d",
+                    fDomain.domain().fLeft, fDomain.domain().fTop, fDomain.domain().fRight,
+                    fDomain.domain().fBottom, fDecalIsFiltered);
         str.append(INHERITED::dumpInfo());
         return str;
     }
 #endif
 
 private:
+    GrFragmentProcessor::OptimizationFlags Flags(GrFragmentProcessor*, GrTextureDomain::Mode,
+                                                 GrTextureDomain::Mode);
+
     GrCoordTransform fCoordTransform;
-    GrTextureDomain fTextureDomain;
-    TextureSampler fTextureSampler;
+    GrTextureDomain fDomain;
+    bool fDecalIsFiltered;
 
-    GrTextureDomainEffect(sk_sp<GrSurfaceProxy>,
-                          SkAlphaType srcAlphaType,
-                          const SkMatrix&,
-                          const SkRect& domain,
-                          GrTextureDomain::Mode modeX,
-                          GrTextureDomain::Mode modeY,
-                          const GrSamplerState&);
+    GrDomainEffect(std::unique_ptr<GrFragmentProcessor>,
+                   const GrCoordTransform& transform,
+                   const SkRect& domain,
+                   GrTextureDomain::Mode modeX,
+                   GrTextureDomain::Mode modeY,
+                   bool decalIsFiltered);
 
-    explicit GrTextureDomainEffect(const GrTextureDomainEffect&);
+    explicit GrDomainEffect(const GrDomainEffect&);
 
     GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
 
@@ -240,8 +303,6 @@
 
     bool onIsEqual(const GrFragmentProcessor&) const override;
 
-    const TextureSampler& onTextureSampler(int) const override { return fTextureSampler; }
-
     GR_DECLARE_FRAGMENT_PROCESSOR_TEST
 
     typedef GrFragmentProcessor INHERITED;
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 7914d77..53760e6 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -118,7 +118,7 @@
 }
 
 void GrGLProgram::setFragmentData(const GrPipeline& pipeline, int* nextTexSamplerIdx) {
-    GrFragmentProcessor::Iter fpIter(pipeline);
+    GrFragmentProcessor::CIter fpIter(pipeline);
     GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
     for (; fpIter && glslIter; ++fpIter, ++glslIter) {
         glslIter->setData(fProgramDataManager, *fpIter);
diff --git a/src/gpu/glsl/GrGLSLFragmentProcessor.h b/src/gpu/glsl/GrGLSLFragmentProcessor.h
index df501c2..a1947d1 100644
--- a/src/gpu/glsl/GrGLSLFragmentProcessor.h
+++ b/src/gpu/glsl/GrGLSLFragmentProcessor.h
@@ -55,7 +55,7 @@
         BuilderInputProvider childInputs(int childIdx) const {
             const GrFragmentProcessor* child = &fFP->childProcessor(childIdx);
             int numToSkip = 0;
-            for (const auto& fp : GrFragmentProcessor::FPRange(*fFP)) {
+            for (const auto& fp : GrFragmentProcessor::FPCRange(*fFP)) {
                 if (&fp == child) {
                     return BuilderInputProvider(child, fTs + numToSkip);
                 }
diff --git a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
index 5347ab3..5976330 100644
--- a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
+++ b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
@@ -170,7 +170,7 @@
                                                           args.fTransformedCoords[0].fUniformMatrix;
         if (mat.isValid()) {
             args.fUniformHandler->updateUniformVisibility(mat, kFragment_GrShaderFlag);
-            this->codeAppendf("_coords = (float3(_coords, 1) * %s).xy;\n",
+            this->codeAppendf("_coords = (%s * float3(_coords, 1)).xy;\n",
                               args.fTransformedCoords[0].fMatrixCode.c_str());
         }
     }
diff --git a/src/gpu/glsl/GrGLSLGeometryProcessor.cpp b/src/gpu/glsl/GrGLSLGeometryProcessor.cpp
index feaa162..26288b9 100644
--- a/src/gpu/glsl/GrGLSLGeometryProcessor.cpp
+++ b/src/gpu/glsl/GrGLSLGeometryProcessor.cpp
@@ -69,33 +69,41 @@
     }
     for (int i = 0; *handler; ++*handler, ++i) {
         auto [coordTransform, fp] = handler->get();
-        SkString strUniName;
-        strUniName.printf("CoordTransformMatrix_%d", i);
-        const char* uniName;
-        fInstalledTransforms.push_back().fHandle = uniformHandler->addUniform(kVertex_GrShaderFlag,
-                                                                              kFloat3x3_GrSLType,
-                                                                              strUniName.c_str(),
-                                                                              &uniName).toIndex();
-        GrSLType varyingType = kFloat2_GrSLType;
-        if (localMatrix.hasPerspective() || coordTransform.matrix().hasPerspective() ||
-            threeComponentLocalCoords) {
-            varyingType = kFloat3_GrSLType;
-        }
-        SkString strVaryingName;
-        strVaryingName.printf("TransformedCoords_%d", i);
-        GrGLSLVarying v(varyingType);
-        if (fp.coordTransformsApplyToLocalCoords()) {
-            varyingHandler->addVarying(strVaryingName.c_str(), &v);
-
-            if (kFloat2_GrSLType == varyingType) {
-                vb->codeAppendf("%s = (%s * %s).xy;", v.vsOut(), uniName, localCoords.c_str());
-            } else {
-                vb->codeAppendf("%s = %s * %s;", v.vsOut(), uniName, localCoords.c_str());
+        if (coordTransform.isNoOp() && !fp.coordTransformsApplyToLocalCoords()) {
+            handler->omitCoordsForCurrCoordTransform();
+            fInstalledTransforms.push_back();
+        } else {
+            SkString strUniName;
+            strUniName.printf("CoordTransformMatrix_%d", i);
+            const char* uniName;
+            fInstalledTransforms.push_back().fHandle = uniformHandler
+                                                               ->addUniform(kVertex_GrShaderFlag,
+                                                                            kFloat3x3_GrSLType,
+                                                                            strUniName.c_str(),
+                                                                            &uniName)
+                                                               .toIndex();
+            GrSLType varyingType = kFloat2_GrSLType;
+            if (localMatrix.hasPerspective() || coordTransform.matrix().hasPerspective() ||
+                threeComponentLocalCoords) {
+                varyingType = kFloat3_GrSLType;
             }
+            SkString strVaryingName;
+            strVaryingName.printf("TransformedCoords_%d", i);
+            GrGLSLVarying v(varyingType);
+            if (fp.coordTransformsApplyToLocalCoords()) {
+                varyingHandler->addVarying(strVaryingName.c_str(), &v);
+
+                if (kFloat2_GrSLType == varyingType) {
+                    vb->codeAppendf("%s = (%s * %s).xy;", v.vsOut(), uniName, localCoords.c_str());
+                } else {
+                    vb->codeAppendf("%s = %s * %s;", v.vsOut(), uniName, localCoords.c_str());
+                }
+            }
+            handler->specifyCoordsForCurrCoordTransform(
+                    SkString(uniName),
+                    fInstalledTransforms.back().fHandle,
+                    GrShaderVar(SkString(v.fsIn()), varyingType));
         }
-        handler->specifyCoordsForCurrCoordTransform(SkString(uniName),
-                                                    fInstalledTransforms.back().fHandle,
-                                                    GrShaderVar(SkString(v.fsIn()), varyingType));
     }
 }
 
@@ -104,10 +112,17 @@
                                                      const CoordTransformRange& transformRange) {
     int i = 0;
     for (auto [transform, fp] : transformRange) {
-        const SkMatrix& m = GetTransformMatrix(localMatrix, transform);
-        if (!fInstalledTransforms[i].fCurrentValue.cheapEqualTo(m)) {
-            pdman.setSkMatrix(fInstalledTransforms[i].fHandle.toIndex(), m);
-            fInstalledTransforms[i].fCurrentValue = m;
+        if (fInstalledTransforms[i].fHandle.isValid()) {
+            SkMatrix m;
+            if (fp.coordTransformsApplyToLocalCoords()) {
+                m = GetTransformMatrix(transform, localMatrix);
+            } else {
+                m = GetTransformMatrix(transform, SkMatrix::I());
+            }
+            if (!fInstalledTransforms[i].fCurrentValue.cheapEqualTo(m)) {
+                pdman.setSkMatrix(fInstalledTransforms[i].fHandle.toIndex(), m);
+                fInstalledTransforms[i].fCurrentValue = m;
+            }
         }
         ++i;
     }
diff --git a/src/gpu/glsl/GrGLSLPrimitiveProcessor.cpp b/src/gpu/glsl/GrGLSLPrimitiveProcessor.cpp
index 983e11e..c28b370 100644
--- a/src/gpu/glsl/GrGLSLPrimitiveProcessor.cpp
+++ b/src/gpu/glsl/GrGLSLPrimitiveProcessor.cpp
@@ -14,10 +14,10 @@
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 #include "src/gpu/glsl/GrGLSLVertexGeoBuilder.h"
 
-SkMatrix GrGLSLPrimitiveProcessor::GetTransformMatrix(const SkMatrix& localMatrix,
-                                                      const GrCoordTransform& coordTransform) {
+SkMatrix GrGLSLPrimitiveProcessor::GetTransformMatrix(const GrCoordTransform& coordTransform,
+                                                      const SkMatrix& preMatrix) {
     SkMatrix combined;
-    combined.setConcat(coordTransform.matrix(), localMatrix);
+    combined.setConcat(coordTransform.matrix(), preMatrix);
     if (coordTransform.normalize()) {
         combined.postIDiv(coordTransform.peekTexture()->width(),
                           coordTransform.peekTexture()->height());
diff --git a/src/gpu/glsl/GrGLSLPrimitiveProcessor.h b/src/gpu/glsl/GrGLSLPrimitiveProcessor.h
index 1aae153..f4ca1f3 100644
--- a/src/gpu/glsl/GrGLSLPrimitiveProcessor.h
+++ b/src/gpu/glsl/GrGLSLPrimitiveProcessor.h
@@ -74,6 +74,12 @@
             SkDEBUGCODE(fAddedCoord = true;)
         }
 
+        void omitCoordsForCurrCoordTransform() {
+            SkASSERT(!fAddedCoord);
+            fTransformedCoordVars->push_back();
+            SkDEBUGCODE(fAddedCoord = true;)
+        }
+
     private:
         GrFragmentProcessor::CoordTransformIter fIter;
         SkDEBUGCODE(bool                        fAddedCoord = false;)
@@ -140,7 +146,7 @@
     virtual void setData(const GrGLSLProgramDataManager&, const GrPrimitiveProcessor&,
                          const CoordTransformRange&) = 0;
 
-    static SkMatrix GetTransformMatrix(const SkMatrix& localMatrix, const GrCoordTransform&);
+    static SkMatrix GetTransformMatrix(const GrCoordTransform&, const SkMatrix& preMatrix);
 
 protected:
     void setupUniformColor(GrGLSLFPFragmentBuilder* fragBuilder,
diff --git a/src/gpu/glsl/GrGLSLProgramBuilder.cpp b/src/gpu/glsl/GrGLSLProgramBuilder.cpp
index 3474380..77f7aee 100644
--- a/src/gpu/glsl/GrGLSLProgramBuilder.cpp
+++ b/src/gpu/glsl/GrGLSLProgramBuilder.cpp
@@ -150,7 +150,7 @@
         const GrFragmentProcessor& fp = this->pipeline().getFragmentProcessor(i);
         output = this->emitAndInstallFragProc(fp, i, transformedCoordVarsIdx, **inOut, output,
                                               &glslFragmentProcessors);
-        for (const auto& subFP : GrFragmentProcessor::FPRange(fp)) {
+        for (const auto& subFP : GrFragmentProcessor::FPCRange(fp)) {
             transformedCoordVarsIdx += subFP.numCoordTransforms();
         }
         **inOut = output;
@@ -185,7 +185,7 @@
 
     SkSTArray<4, SamplerHandle> texSamplers;
     int samplerIdx = 0;
-    for (const auto& subFP : GrFragmentProcessor::FPRange(fp)) {
+    for (const auto& subFP : GrFragmentProcessor::FPCRange(fp)) {
         for (int i = 0; i < subFP.numTextureSamplers(); ++i) {
             SkString name;
             name.printf("TextureSampler_%d", samplerIdx++);
diff --git a/src/gpu/mtl/GrMtlPipelineState.mm b/src/gpu/mtl/GrMtlPipelineState.mm
index ecc6366..b596309 100644
--- a/src/gpu/mtl/GrMtlPipelineState.mm
+++ b/src/gpu/mtl/GrMtlPipelineState.mm
@@ -89,7 +89,7 @@
         fSamplerBindings.emplace_back(sampler.samplerState(), texture, fGpu);
     }
 
-    GrFragmentProcessor::Iter fpIter(programInfo.pipeline());
+    GrFragmentProcessor::CIter fpIter(programInfo.pipeline());
     GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
     for (; fpIter && glslIter; ++fpIter, ++glslIter) {
         glslIter->setData(fDataManager, *fpIter);
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index a1b6c41..b6f5a02 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -976,14 +976,13 @@
 
         GrSurfaceProxy* proxy = proxyView.proxy();
         std::unique_ptr<GrFragmentProcessor> fp;
+        fp = GrSimpleTextureEffect::Make(sk_ref_sp(proxy), alphaType, SkMatrix::I(), filter);
         if (domain) {
             // Update domain to match what GrTextureOp would do for bilerp, but don't do any
             // normalization since GrTextureDomainEffect handles that and the origin.
             SkRect correctedDomain = normalize_domain(filter, {1.f, 1.f, 0.f}, domain);
-            fp = GrTextureDomainEffect::Make(sk_ref_sp(proxy), alphaType, SkMatrix::I(),
-                                             correctedDomain, GrTextureDomain::kClamp_Mode, filter);
-        } else {
-            fp = GrSimpleTextureEffect::Make(sk_ref_sp(proxy), alphaType, SkMatrix::I(), filter);
+            fp = GrDomainEffect::Make(std::move(fp), correctedDomain, GrTextureDomain::kClamp_Mode,
+                                      filter);
         }
         fp = GrColorSpaceXformEffect::Make(std::move(fp), std::move(textureXform));
         paint.addColorFragmentProcessor(std::move(fp));
diff --git a/src/gpu/vk/GrVkPipelineState.cpp b/src/gpu/vk/GrVkPipelineState.cpp
index e2a6fe5..7bcd342 100644
--- a/src/gpu/vk/GrVkPipelineState.cpp
+++ b/src/gpu/vk/GrVkPipelineState.cpp
@@ -105,7 +105,7 @@
 
     GrFragmentProcessor::PipelineCoordTransformRange transformRange(programInfo.pipeline());
     fGeometryProcessor->setData(fDataManager, programInfo.primProc(), transformRange);
-    GrFragmentProcessor::Iter fpIter(programInfo.pipeline());
+    GrFragmentProcessor::CIter fpIter(programInfo.pipeline());
     GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
     for (; fpIter && glslIter; ++fpIter, ++glslIter) {
         glslIter->setData(fDataManager, *fpIter);
@@ -164,7 +164,7 @@
         samplerBindings[currTextureBinding++] = {sampler.samplerState(), texture};
     }
 
-    GrFragmentProcessor::Iter fpIter(pipeline);
+    GrFragmentProcessor::CIter fpIter(pipeline);
     GrGLSLFragmentProcessor::Iter glslIter(fFragmentProcessors.get(), fFragmentProcessorCnt);
     for (; fpIter && glslIter; ++fpIter, ++glslIter) {
         for (int i = 0; i < fpIter->numTextureSamplers(); ++i) {
diff --git a/src/shaders/SkImageShader.cpp b/src/shaders/SkImageShader.cpp
index 3f3cd0c..4cebcb4 100755
--- a/src/shaders/SkImageShader.cpp
+++ b/src/shaders/SkImageShader.cpp
@@ -247,14 +247,14 @@
         inner = GrBicubicEffect::Make(std::move(proxy), lmInverse, wrapModes, domainX, domainY,
                                       kDir, srcAlphaType);
     } else {
+        auto dimensions = proxy->dimensions();
+        inner = GrSimpleTextureEffect::Make(std::move(proxy), srcAlphaType, lmInverse,
+                                            samplerState);
         if (domainX != GrTextureDomain::kIgnore_Mode || domainY != GrTextureDomain::kIgnore_Mode) {
-            SkRect domain = GrTextureDomain::MakeTexelDomain(SkIRect::MakeSize(proxy->dimensions()),
+            SkRect domain = GrTextureDomain::MakeTexelDomain(SkIRect::MakeSize(dimensions),
                                                              domainX, domainY);
-            inner = GrTextureDomainEffect::Make(std::move(proxy), srcAlphaType, lmInverse, domain,
-                                                domainX, domainY, samplerState);
-        } else {
-            inner = GrSimpleTextureEffect::Make(std::move(proxy), srcAlphaType, lmInverse,
-                                                samplerState);
+            inner = GrDomainEffect::Make(std::move(inner), domain, domainX, domainY,
+                                         samplerState.filter());
         }
     }
     inner = GrColorSpaceXformEffect::Make(std::move(inner), fImage->colorSpace(), srcAlphaType,