Fold analytic clip FPs into GrReducedClip

Perf result on Pixel phone (sorted by impact):

  GEOMEAN                               6.73 -> 6.49 ms   [96% ]

  top25desk_pinterest.skp               0.45 -> 0.49 ms   [107%]
  desk_pokemonwiki.skp                  14.6 -> 15.9 ms   [106%]
  keymobi_pinterest.skp                 0.47 -> 0.49 ms   [104%]
  ...
  keymobi_androidpolice_com_2012_.skp   3.69 -> 3.09 ms   [83% ]
  keymobi_shop_mobileweb_ebay_com.skp   2.90 -> 2.29 ms   [78% ]
  keymobi_boingboing_net.skp            2.95 -> 2.29 ms   [76% ]
  desk_jsfiddlebigcar.skp               1.79 -> 1.29 ms   [71% ]
  keymobi_m_youtube_com_watch_v_9.skp   12.9 -> 9.09 ms   [70% ]
  keymobi_blogger.skp                   3.80 -> 2.69 ms   [70% ]
  keymobi_sfgate_com_.skp               8.16 -> 5.69 ms   [69% ]

Cleaner code, improved skps, slightly better geometric mean time.

Pixel C is mostly unaffected, presumably because it uses window
rectangles.

Bug: skia:7190
Change-Id: I9c7f3512ca57e1d1afcd42865357b63ffcc192ce
Reviewed-on: https://skia-review.googlesource.com/66280
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/gpu/GrClipStackClip.cpp b/src/gpu/GrClipStackClip.cpp
index aa2b9fd..b18cb16 100644
--- a/src/gpu/GrClipStackClip.cpp
+++ b/src/gpu/GrClipStackClip.cpp
@@ -30,7 +30,15 @@
 typedef GrReducedClip::InitialState InitialState;
 typedef GrReducedClip::ElementList ElementList;
 
+// An element count of 4 was chosen because of the common pattern in Blink of:
+//   isect RR
+//   diff  RR
+//   isect convex_poly
+//   isect convex_poly
+// when drawing rounded div borders. This could probably be tuned based on a configuration's
+// relative costs of switching RTs to generate a mask vs longer shaders.
 static const int kMaxAnalyticElements = 4;
+
 const char GrClipStackClip::kMaskTestTag[] = "clip_mask";
 
 bool GrClipStackClip::quickContains(const SkRect& rect) const {
@@ -176,80 +184,6 @@
     return false;
 }
 
-static bool get_analytic_clip_processor(const ElementList& elements,
-                                        bool abortIfAA,
-                                        const SkRect& drawDevBounds,
-                                        std::unique_ptr<GrFragmentProcessor>* resultFP) {
-    SkASSERT(elements.count() <= kMaxAnalyticElements);
-    SkSTArray<kMaxAnalyticElements, std::unique_ptr<GrFragmentProcessor>> fps;
-    ElementList::Iter iter(elements);
-    while (iter.get()) {
-        SkClipOp op = iter.get()->getOp();
-        bool invert;
-        bool skip = false;
-        switch (op) {
-            case kReplace_SkClipOp:
-                SkASSERT(iter.get() == elements.head());
-                // Fallthrough, handled same as intersect.
-            case kIntersect_SkClipOp:
-                invert = false;
-                if (iter.get()->contains(drawDevBounds)) {
-                    skip = true;
-                }
-                break;
-            case kDifference_SkClipOp:
-                invert = true;
-                // We don't currently have a cheap test for whether a rect is fully outside an
-                // element's primitive, so don't attempt to set skip.
-                break;
-            default:
-                return false;
-        }
-        if (!skip) {
-            GrPrimitiveEdgeType edgeType;
-            if (iter.get()->isAA()) {
-                if (abortIfAA) {
-                    return false;
-                }
-                edgeType =
-                    invert ? kInverseFillAA_GrProcessorEdgeType : kFillAA_GrProcessorEdgeType;
-            } else {
-                edgeType =
-                    invert ? kInverseFillBW_GrProcessorEdgeType : kFillBW_GrProcessorEdgeType;
-            }
-
-            switch (iter.get()->getDeviceSpaceType()) {
-                case SkClipStack::Element::DeviceSpaceType::kPath:
-                    fps.emplace_back(
-                            GrConvexPolyEffect::Make(edgeType, iter.get()->getDeviceSpacePath()));
-                    break;
-                case SkClipStack::Element::DeviceSpaceType::kRRect: {
-                    fps.emplace_back(
-                            GrRRectEffect::Make(edgeType, iter.get()->getDeviceSpaceRRect()));
-                    break;
-                }
-                case SkClipStack::Element::DeviceSpaceType::kRect: {
-                    fps.emplace_back(
-                            GrConvexPolyEffect::Make(edgeType, iter.get()->getDeviceSpaceRect()));
-                    break;
-                }
-                default:
-                    break;
-            }
-            if (!fps.back()) {
-                return false;
-            }
-        }
-        iter.next();
-    }
-
-    *resultFP = nullptr;
-    if (fps.count()) {
-        *resultFP = GrFragmentProcessor::RunInSeries(fps.begin(), fps.count());
-    }
-    return true;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // sort out what kind of clip mask needs to be created: alpha, stencil,
 // scissor, or entirely software
@@ -265,8 +199,19 @@
         return true;
     }
 
-    const GrReducedClip reducedClip(*fStack, devBounds,
-                                    renderTargetContext->priv().maxWindowRectangles());
+    int maxAnalyticFPs = kMaxAnalyticElements;
+    if (GrFSAAType::kNone != renderTargetContext->fsaaType()) {
+        // With mixed samples (non-msaa color buffer), any coverage info is lost from color once it
+        // hits the color buffer anyway, so we may as well use coverage AA if nothing else in the
+        // pipe is multisampled.
+        if (renderTargetContext->numColorSamples() > 0 || useHWAA || hasUserStencilSettings) {
+            maxAnalyticFPs = 0;
+        }
+        SkASSERT(!context->caps()->avoidStencilBuffers()); // We disable MSAA when avoiding stencil.
+    }
+
+    GrReducedClip reducedClip(*fStack, devBounds, renderTargetContext->priv().maxWindowRectangles(),
+                              maxAnalyticFPs);
 
     if (reducedClip.hasScissor() && !GrClip::IsInsideClip(reducedClip.scissor(), devBounds)) {
         out->addScissor(reducedClip.scissor(), bounds);
@@ -277,6 +222,10 @@
                                  GrWindowRectsState::Mode::kExclusive);
     }
 
+    if (std::unique_ptr<GrFragmentProcessor> clipFPs = reducedClip.detachAnalyticFPs()) {
+        out->addCoverageFP(std::move(clipFPs));
+    }
+
     if (reducedClip.maskElements().isEmpty()) {
         return InitialState::kAllIn == reducedClip.initialState();
     }
@@ -289,41 +238,9 @@
     SkASSERT(rtIBounds.contains(scissor)); // Mask shouldn't be larger than the RT.
 #endif
 
-    bool avoidStencilBuffers = context->caps()->avoidStencilBuffers();
-
-    // An element count of 4 was chosen because of the common pattern in Blink of:
-    //   isect RR
-    //   diff  RR
-    //   isect convex_poly
-    //   isect convex_poly
-    // when drawing rounded div borders. This could probably be tuned based on a
-    // configuration's relative costs of switching RTs to generate a mask vs
-    // longer shaders.
-    if (reducedClip.maskElements().count() <= kMaxAnalyticElements) {
-        // When there are multiple samples we want to do per-sample clipping, not compute a
-        // fractional pixel coverage.
-        bool disallowAnalyticAA =
-                GrFSAAType::kNone != renderTargetContext->fsaaType() && !avoidStencilBuffers;
-        if (disallowAnalyticAA && !renderTargetContext->numColorSamples()) {
-            // With a single color sample, any coverage info is lost from color once it hits the
-            // color buffer anyway, so we may as well use coverage AA if nothing else in the pipe
-            // is multisampled.
-            disallowAnalyticAA = useHWAA || hasUserStencilSettings;
-        }
-        std::unique_ptr<GrFragmentProcessor> clipFP;
-        if ((reducedClip.maskRequiresAA() || avoidStencilBuffers) &&
-            get_analytic_clip_processor(reducedClip.maskElements(), disallowAnalyticAA, devBounds,
-                                        &clipFP)) {
-            if (clipFP) {
-                out->addCoverageFP(std::move(clipFP));
-            }
-            return true;
-        }
-    }
-
     // If the stencil buffer is multisampled we can use it to do everything.
     if ((GrFSAAType::kNone == renderTargetContext->fsaaType() && reducedClip.maskRequiresAA()) ||
-        avoidStencilBuffers) {
+        context->caps()->avoidStencilBuffers()) {
         sk_sp<GrTextureProxy> result;
         if (UseSWOnlyPath(context, hasUserStencilSettings, renderTargetContext, reducedClip)) {
             // The clip geometry is complex enough that it will be more efficient to create it
@@ -343,7 +260,8 @@
         // If alpha or software clip mask creation fails, fall through to the stencil code paths,
         // unless stencils are disallowed.
         if (context->caps()->avoidStencilBuffers()) {
-            SkDebugf("WARNING: Clip mask requires stencil, but stencil unavailable. Clip will be ignored.\n");
+            SkDebugf("WARNING: Clip mask requires stencil, but stencil unavailable. "
+                     "Clip will be ignored.\n");
             return false;
         }
     }
@@ -353,11 +271,14 @@
     // This relies on the property that a reduced sub-rect of the last clip will contain all the
     // relevant window rectangles that were in the last clip. This subtle requirement will go away
     // after clipping is overhauled.
-    if (renderTargetContext->priv().mustRenderClip(reducedClip.maskGenID(),
-                                                   reducedClip.scissor())) {
+    if (renderTargetContext->priv().mustRenderClip(reducedClip.maskGenID(), reducedClip.scissor(),
+                                                   reducedClip.numAnalyticFPs())) {
         reducedClip.drawStencilClipMask(context, renderTargetContext);
-        renderTargetContext->priv().setLastClip(reducedClip.maskGenID(), reducedClip.scissor());
+        renderTargetContext->priv().setLastClip(reducedClip.maskGenID(), reducedClip.scissor(),
+                                                reducedClip.numAnalyticFPs());
     }
+    // GrAppliedClip doesn't need to figure numAnalyticFPs into its key (used by operator==) because
+    // it verifies the FPs are also equal.
     out->addStencilClip(reducedClip.maskGenID());
     return true;
 }
@@ -365,14 +286,16 @@
 ////////////////////////////////////////////////////////////////////////////////
 // Create a 8-bit clip mask in alpha
 
-static void create_clip_mask_key(uint32_t clipGenID, const SkIRect& bounds, GrUniqueKey* key) {
+static void create_clip_mask_key(uint32_t clipGenID, const SkIRect& bounds, int numAnalyticFPs,
+                                 GrUniqueKey* key) {
     static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
-    GrUniqueKey::Builder builder(key, kDomain, 3, GrClipStackClip::kMaskTestTag);
+    GrUniqueKey::Builder builder(key, kDomain, 4, GrClipStackClip::kMaskTestTag);
     builder[0] = clipGenID;
     // SkToS16 because image filters outset layers to a size indicated by the filter, which can
     // sometimes result in negative coordinates from device space.
     builder[1] = SkToS16(bounds.fLeft) | (SkToS16(bounds.fRight) << 16);
     builder[2] = SkToS16(bounds.fTop) | (SkToS16(bounds.fBottom) << 16);
+    builder[3] = numAnalyticFPs;
 }
 
 static void add_invalidate_on_pop_message(const SkClipStack& stack, uint32_t clipGenID,
@@ -393,7 +316,8 @@
                                                            const GrReducedClip& reducedClip) const {
     GrResourceProvider* resourceProvider = context->resourceProvider();
     GrUniqueKey key;
-    create_clip_mask_key(reducedClip.maskGenID(), reducedClip.scissor(), &key);
+    create_clip_mask_key(reducedClip.maskGenID(), reducedClip.scissor(),
+                         reducedClip.numAnalyticFPs(), &key);
 
     sk_sp<GrTextureProxy> proxy(resourceProvider->findOrCreateProxyByUniqueKey(
                                                                 key, kBottomLeft_GrSurfaceOrigin));
@@ -505,7 +429,8 @@
         GrContext* context, const GrReducedClip& reducedClip,
         GrRenderTargetContext* renderTargetContext) const {
     GrUniqueKey key;
-    create_clip_mask_key(reducedClip.maskGenID(), reducedClip.scissor(), &key);
+    create_clip_mask_key(reducedClip.maskGenID(), reducedClip.scissor(),
+                         reducedClip.numAnalyticFPs(), &key);
 
     sk_sp<GrTextureProxy> proxy(context->resourceProvider()->findOrCreateProxyByUniqueKey(
                                                                   key, kTopLeft_GrSurfaceOrigin));
diff --git a/src/gpu/GrReducedClip.cpp b/src/gpu/GrReducedClip.cpp
index bc3286b..8d49af0 100644
--- a/src/gpu/GrReducedClip.cpp
+++ b/src/gpu/GrReducedClip.cpp
@@ -20,6 +20,8 @@
 #include "GrStyle.h"
 #include "GrUserStencilSettings.h"
 #include "SkClipOpPriv.h"
+#include "effects/GrConvexPolyEffect.h"
+#include "effects/GrRRectEffect.h"
 
 /**
  * There are plenty of optimizations that could be added here. Maybe flips could be folded into
@@ -29,8 +31,11 @@
  * take a rect in case the caller knows a bound on what is to be drawn through this clip.
  */
 GrReducedClip::GrReducedClip(const SkClipStack& stack, const SkRect& queryBounds,
-                             int maxWindowRectangles) {
+                             int maxWindowRectangles, int maxAnalyticFPs)
+        : fMaxWindowRectangles(maxWindowRectangles)
+        , fMaxAnalyticFPs(maxAnalyticFPs) {
     SkASSERT(!queryBounds.isEmpty());
+    SkASSERT(fMaxWindowRectangles <= GrWindowRectangles::kMaxWindows);
     fHasScissor = false;
     fAAClipRectGenID = SK_InvalidGenID;
 
@@ -96,12 +101,13 @@
         }
         fHasScissor = true;
 
-        // Now that we have determined the bounds to use and filtered out the trivial cases, call the
-        // helper that actually walks the stack.
-        this->walkStack(stack, tighterQuery, maxWindowRectangles);
+        // Now that we have determined the bounds to use and filtered out the trivial cases, call
+        // the helper that actually walks the stack.
+        this->walkStack(stack, tighterQuery);
     }
 
-    if (SK_InvalidGenID != fAAClipRectGenID) { // Is there an AA clip rect?
+    if (SK_InvalidGenID != fAAClipRectGenID && // Is there an AA clip rect?
+        ClipResult::kNotClipped == this->addAnalyticFP(fAAClipRect, Invert::kNo, true)) {
         if (fMaskElements.isEmpty()) {
             // Use a replace since it is faster than intersect.
             fMaskElements.addToHead(fAAClipRect, SkMatrix::I(), kReplace_SkClipOp, true /*doAA*/);
@@ -111,12 +117,10 @@
         }
         fMaskRequiresAA = true;
         fMaskGenID = fAAClipRectGenID;
-        fAAClipRectGenID = SK_InvalidGenID;
     }
 }
 
-void GrReducedClip::walkStack(const SkClipStack& stack, const SkRect& queryBounds,
-                              int maxWindowRectangles) {
+void GrReducedClip::walkStack(const SkClipStack& stack, const SkRect& queryBounds) {
     // walk backwards until we get to:
     //  a) the beginning
     //  b) an operation that is known to make the bounds all inside/outside
@@ -179,7 +183,7 @@
                     } else if (GrClip::IsOutsideClip(element->getBounds(), queryBounds)) {
                         skippable = true;
                     } else if (!embiggens) {
-                        ClipResult result = this->clipOutsideElement(element, maxWindowRectangles);
+                        ClipResult result = this->clipOutsideElement(element);
                         if (ClipResult::kMadeEmpty == result) {
                             return;
                         }
@@ -479,34 +483,43 @@
             return ClipResult::kClipped;
 
         case Element::DeviceSpaceType::kRRect:
+            return this->addAnalyticFP(element->getDeviceSpaceRRect(), Invert::kNo,
+                                       element->isAA());
+
         case Element::DeviceSpaceType::kPath:
-            return ClipResult::kNotClipped;
+            return this->addAnalyticFP(element->getDeviceSpacePath(), Invert::kNo, element->isAA());
     }
 
     SK_ABORT("Unexpected DeviceSpaceType");
     return ClipResult::kNotClipped;
 }
 
-GrReducedClip::ClipResult GrReducedClip::clipOutsideElement(const Element* element,
-                                                            int maxWindowRectangles) {
-    if (fWindowRects.count() >= maxWindowRectangles) {
-        return ClipResult::kNotClipped;
-    }
-
+GrReducedClip::ClipResult GrReducedClip::clipOutsideElement(const Element* element) {
     switch (element->getDeviceSpaceType()) {
         case Element::DeviceSpaceType::kEmpty:
             return ClipResult::kMadeEmpty;
 
         case Element::DeviceSpaceType::kRect:
-            // Clip out the inside of every rect. We won't be able to entirely skip the AA ones, but
-            // it saves processing time.
-            this->addWindowRectangle(element->getDeviceSpaceRect(), element->isAA());
-            return !element->isAA() ? ClipResult::kClipped : ClipResult::kNotClipped;
+            if (fWindowRects.count() < fMaxWindowRectangles) {
+                // Clip out the inside of every rect. We won't be able to entirely skip the AA ones,
+                // but it saves processing time.
+                this->addWindowRectangle(element->getDeviceSpaceRect(), element->isAA());
+                if (!element->isAA()) {
+                    return ClipResult::kClipped;
+                }
+            }
+            return this->addAnalyticFP(element->getDeviceSpaceRect(), Invert::kYes,
+                                       element->isAA());
 
         case Element::DeviceSpaceType::kRRect: {
-            // Clip out the interiors of round rects with two window rectangles in the shape of a
-            // plus. It doesn't allow us to skip the clip element, but still saves processing time.
             const SkRRect& clipRRect = element->getDeviceSpaceRRect();
+            ClipResult clipResult = this->addAnalyticFP(clipRRect, Invert::kYes, element->isAA());
+            if (fWindowRects.count() >= fMaxWindowRectangles) {
+                return clipResult;
+            }
+
+            // Clip out the interiors of round rects with two window rectangles in the shape of a
+            // "plus". This doesn't let us skip the clip element, but still saves processing time.
             SkVector insetTL = clipRRect.radii(SkRRect::kUpperLeft_Corner);
             SkVector insetBR = clipRRect.radii(SkRRect::kLowerRight_Corner);
             if (SkRRect::kComplex_Type == clipRRect.getType()) {
@@ -520,24 +533,25 @@
             const SkRect& bounds = clipRRect.getBounds();
             if (insetTL.x() + insetBR.x() >= bounds.width() ||
                 insetTL.y() + insetBR.y() >= bounds.height()) {
-                return ClipResult::kNotClipped; // The interior "plus" is empty.
+                return clipResult; // The interior "plus" is empty.
             }
 
             SkRect horzRect = SkRect::MakeLTRB(bounds.left(), bounds.top() + insetTL.y(),
                                                bounds.right(), bounds.bottom() - insetBR.y());
             this->addWindowRectangle(horzRect, element->isAA());
-            if (fWindowRects.count() >= maxWindowRectangles) {
-                return ClipResult::kNotClipped;
+
+            if (fWindowRects.count() < fMaxWindowRectangles) {
+                SkRect vertRect = SkRect::MakeLTRB(bounds.left() + insetTL.x(), bounds.top(),
+                                                   bounds.right() - insetBR.x(), bounds.bottom());
+                this->addWindowRectangle(vertRect, element->isAA());
             }
 
-            SkRect vertRect = SkRect::MakeLTRB(bounds.left() + insetTL.x(), bounds.top(),
-                                               bounds.right() - insetBR.x(), bounds.bottom());
-            this->addWindowRectangle(vertRect, element->isAA());
-            return ClipResult::kNotClipped;
+            return clipResult;
         }
 
         case Element::DeviceSpaceType::kPath:
-            return ClipResult::kNotClipped;
+            return this->addAnalyticFP(element->getDeviceSpacePath(), Invert::kYes,
+                                       element->isAA());
     }
 
     SK_ABORT("Unexpected DeviceSpaceType");
@@ -556,6 +570,43 @@
     }
 }
 
+std::unique_ptr<GrFragmentProcessor> make_analytic_clip_fp(GrPrimitiveEdgeType edgeType,
+                                                           const SkRect& deviceSpaceRect) {
+    return GrConvexPolyEffect::Make(edgeType, deviceSpaceRect);
+}
+
+std::unique_ptr<GrFragmentProcessor> make_analytic_clip_fp(GrPrimitiveEdgeType edgeType,
+                                                           const SkRRect& deviceSpaceRRect) {
+    return GrRRectEffect::Make(edgeType, deviceSpaceRRect);
+}
+
+std::unique_ptr<GrFragmentProcessor> make_analytic_clip_fp(GrPrimitiveEdgeType edgeType,
+                                                           const SkPath& deviceSpacePath) {
+    return GrConvexPolyEffect::Make(edgeType, deviceSpacePath);
+}
+
+template<typename T>
+inline GrReducedClip::ClipResult GrReducedClip::addAnalyticFP(const T& deviceSpaceShape,
+                                                              Invert invert, bool aa) {
+    if (fAnalyticFPs.count() >= fMaxAnalyticFPs) {
+        return ClipResult::kNotClipped;
+    }
+
+    GrPrimitiveEdgeType edgeType;
+    if (Invert::kNo == invert) {
+        edgeType = aa ? kFillAA_GrProcessorEdgeType : kFillBW_GrProcessorEdgeType;
+    } else {
+        edgeType = aa ? kInverseFillAA_GrProcessorEdgeType : kInverseFillBW_GrProcessorEdgeType;
+    }
+
+    if (auto fp = make_analytic_clip_fp(edgeType, deviceSpaceShape)) {
+        fAnalyticFPs.push_back(std::move(fp));
+        return ClipResult::kClipped;
+    }
+
+    return ClipResult::kNotClipped;
+}
+
 void GrReducedClip::makeEmpty() {
     fHasScissor = false;
     fAAClipRectGenID = SK_InvalidGenID;
diff --git a/src/gpu/GrReducedClip.h b/src/gpu/GrReducedClip.h
index 0746439..ca13355 100644
--- a/src/gpu/GrReducedClip.h
+++ b/src/gpu/GrReducedClip.h
@@ -8,6 +8,7 @@
 #ifndef GrReducedClip_DEFINED
 #define GrReducedClip_DEFINED
 
+#include "GrFragmentProcessor.h"
 #include "GrWindowRectangles.h"
 #include "SkClipStack.h"
 #include "SkTLList.h"
@@ -24,7 +25,8 @@
     using Element = SkClipStack::Element;
     using ElementList = SkTLList<SkClipStack::Element, 16>;
 
-    GrReducedClip(const SkClipStack&, const SkRect& queryBounds, int maxWindowRectangles = 0);
+    GrReducedClip(const SkClipStack&, const SkRect& queryBounds,
+                  int maxWindowRectangles = 0, int maxAnalyticFPs = 0);
 
     /**
      * If hasScissor() is true, the clip mask is not valid outside this rect and the caller must
@@ -48,6 +50,13 @@
      */
     const GrWindowRectangles& windowRectangles() const { return fWindowRects; }
 
+    int numAnalyticFPs() const { return fAnalyticFPs.count(); }
+
+    std::unique_ptr<GrFragmentProcessor> detachAnalyticFPs() {
+        SkDEBUGCODE(for (const auto& fp : fAnalyticFPs) { SkASSERT(fp); })
+        return GrFragmentProcessor::RunInSeries(fAnalyticFPs.begin(), fAnalyticFPs.count());
+    }
+
     /**
      * An ordered list of clip elements that could not be skipped or implemented by other means. If
      * nonempty, the caller must create an alpha and/or stencil mask for these elements and apply it
@@ -81,7 +90,7 @@
     bool drawStencilClipMask(GrContext*, GrRenderTargetContext*) const;
 
 private:
-    void walkStack(const SkClipStack&, const SkRect& queryBounds, int maxWindowRectangles);
+    void walkStack(const SkClipStack&, const SkRect& queryBounds);
 
     enum class ClipResult {
         kNotClipped,
@@ -95,16 +104,27 @@
 
     // Clips the the given element's exterior out of the final clip.
     // NOTE: do not call for elements followed by ops that can grow the clip.
-    ClipResult clipOutsideElement(const Element* element, int maxWindowRectangles);
+    ClipResult clipOutsideElement(const Element* element);
 
     void addWindowRectangle(const SkRect& elementInteriorRect, bool elementIsAA);
+
+    enum class Invert : bool {
+        kNo,
+        kYes
+    };
+
+    template<typename T> ClipResult addAnalyticFP(const T& deviceSpaceShape, Invert, bool aa);
+
     void makeEmpty();
 
+    const int            fMaxWindowRectangles;
+    const int            fMaxAnalyticFPs;
     SkIRect              fScissor;
     bool                 fHasScissor;
     SkRect               fAAClipRect;
     uint32_t             fAAClipRectGenID; // GenID the mask will have if includes the AA clip rect.
     GrWindowRectangles   fWindowRects;
+    SkSTArray<4, std::unique_ptr<GrFragmentProcessor>> fAnalyticFPs;
     ElementList          fMaskElements;
     uint32_t             fMaskGenID;
     bool                 fMaskRequiresAA;
diff --git a/src/gpu/GrRenderTargetContextPriv.h b/src/gpu/GrRenderTargetContextPriv.h
index 2cfd5c8..4b3ceef 100644
--- a/src/gpu/GrRenderTargetContextPriv.h
+++ b/src/gpu/GrRenderTargetContextPriv.h
@@ -28,18 +28,22 @@
 
     // called to note the last clip drawn to the stencil buffer.
     // TODO: remove after clipping overhaul.
-    void setLastClip(uint32_t clipStackGenID, const SkIRect& devClipBounds) {
+    void setLastClip(uint32_t clipStackGenID, const SkIRect& devClipBounds,
+                     int numClipAnalyticFPs) {
         GrRenderTargetOpList* opList = fRenderTargetContext->getRTOpList();
         opList->fLastClipStackGenID = clipStackGenID;
         opList->fLastDevClipBounds = devClipBounds;
+        opList->fLastClipNumAnalyticFPs = numClipAnalyticFPs;
     }
 
     // called to determine if we have to render the clip into SB.
     // TODO: remove after clipping overhaul.
-    bool mustRenderClip(uint32_t clipStackGenID, const SkIRect& devClipBounds) const {
+    bool mustRenderClip(uint32_t clipStackGenID, const SkIRect& devClipBounds,
+                        int numClipAnalyticFPs) const {
         GrRenderTargetOpList* opList = fRenderTargetContext->getRTOpList();
         return opList->fLastClipStackGenID != clipStackGenID ||
-               !opList->fLastDevClipBounds.contains(devClipBounds);
+               !opList->fLastDevClipBounds.contains(devClipBounds) ||
+               opList->fLastClipNumAnalyticFPs != numClipAnalyticFPs;
     }
 
     void clear(const GrFixedClip&, const GrColor, bool canIgnoreClip);
diff --git a/src/gpu/GrRenderTargetOpList.h b/src/gpu/GrRenderTargetOpList.h
index 24125ac..e9797db 100644
--- a/src/gpu/GrRenderTargetOpList.h
+++ b/src/gpu/GrRenderTargetOpList.h
@@ -156,6 +156,7 @@
 
     uint32_t                       fLastClipStackGenID;
     SkIRect                        fLastDevClipBounds;
+    int                            fLastClipNumAnalyticFPs;
 
     // For ops/opList we have mean: 5 stdDev: 28
     SkSTArray<5, RecordedOp, true> fRecordedOps;