Avoid re-rendering stencil clip for every draw with reducable clip stack

Fixes the cases where clip stack reduction would cause clip to be
re-rendered to stencil for each draw call. This causes unneeded
slowdown.

Stencil cache would not be used because the clip stack generation id communicated
by the clip stack element list would be invalid. This happended due to

 a) clip stack reduction creating new elements in the element list.

 b) purging logic removing the generation id, but reduction logic
    selecting already purged element, and thus the generation id, as
    the representative state of the clip.

Cases of a) where reduction would flatten the stack to a single new
element were fixed by assigning the generation id of the top-most
element of the clip stack as the generation id of the new
element. This is not strictly minimal, but enables more caching than
using invalid id.

Cases of a) where reduction would substitute a stack element with a
new element the generation id of the substituted element is used.

The b) part was fixed by removing the purging logic. It was not
exactly correct, as the previously purged states were actually
used. The purging was not used for anything.

Changes SkClipStack API to highlight that invalid generation id is
never returned by SkClipStack. Empty stacks are wide open. Changes the
clients to reflect this.

Fixes a crash when not passing anti-alias out parameter to
GrReducedClip::ReduceClipStack. The crash is not exercised in the
current code.

Committed: http://code.google.com/p/skia/source/detail?r=12084

R=bsalomon@google.com, robertphillips@google.com

Author: kkinnunen@nvidia.com

Review URL: https://codereview.chromium.org/48593003

git-svn-id: http://skia.googlecode.com/svn/trunk@12127 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/core/SkClipStack.h b/include/core/SkClipStack.h
index 145552d..0d6cfb2 100644
--- a/include/core/SkClipStack.h
+++ b/include/core/SkClipStack.h
@@ -109,7 +109,7 @@
             stack not to the element itself. That is the same clip path in different stacks will
             have a different ID since the elements produce different clip result in the context of
             their stacks. */
-        int32_t getGenID() const { return fGenID; }
+        int32_t getGenID() const { SkASSERT(kInvalidGenID != fGenID); return fGenID; }
 
         /**
          * Gets the bounds of the clip element, either the rect or path bounds. (Whether the shape
@@ -316,25 +316,12 @@
     bool isWideOpen() const;
 
     /**
-     * Add a callback function that will be called whenever a clip state
-     * is no longer viable. This will occur whenever restore
-     * is called or when a clipDevRect or clipDevPath call updates the
-     * clip within an existing save/restore state. Each clip state is
-     * represented by a unique generation ID.
-     */
-    typedef void (*PFPurgeClipCB)(int genID, void* data);
-    void addPurgeClipCallback(PFPurgeClipCB callback, void* data) const;
-
-    /**
-     * Remove a callback added earlier via addPurgeClipCallback
-     */
-    void removePurgeClipCallback(PFPurgeClipCB callback, void* data) const;
-
-    /**
      * The generation ID has three reserved values to indicate special
      * (potentially ignorable) cases
      */
-    static const int32_t kInvalidGenID = 0;
+    static const int32_t kInvalidGenID = 0;     //!< Invalid id that is never returned by
+                                                //!< SkClipStack. Useful when caching clips
+                                                //!< based on GenID.
     static const int32_t kEmptyGenID = 1;       // no pixels writeable
     static const int32_t kWideOpenGenID = 2;    // all pixels writeable
 
@@ -440,29 +427,12 @@
     // invalid ID.
     static int32_t     gGenID;
 
-    struct ClipCallbackData {
-        PFPurgeClipCB   fCallback;
-        void*           fData;
-
-        friend bool operator==(const ClipCallbackData& a,
-                               const ClipCallbackData& b) {
-            return a.fCallback == b.fCallback && a.fData == b.fData;
-        }
-    };
-
-    mutable SkTDArray<ClipCallbackData> fCallbackData;
-
     /**
      * Restore the stack back to the specified save count.
      */
     void restoreTo(int saveCount);
 
     /**
-     * Invoke all the purge callbacks passing in element's generation ID.
-     */
-    void purgeClip(Element* element);
-
-    /**
      * Return the next unique generation ID.
      */
     static int32_t GetNextGenID();
diff --git a/src/core/SkClipStack.cpp b/src/core/SkClipStack.cpp
index 2c0961a..9e3f7c6 100644
--- a/src/core/SkClipStack.cpp
+++ b/src/core/SkClipStack.cpp
@@ -452,7 +452,6 @@
         if (element->fSaveCount <= saveCount) {
             break;
         }
-        this->purgeClip(element);
         element->~Element();
         fDeque.pop_back();
     }
@@ -540,7 +539,6 @@
                     return;
                 case Element::kRect_Type:
                     if (element->rectRectIntersectAllowed(rect, doAA)) {
-                        this->purgeClip(element);
                         if (!element->fRect.intersect(rect)) {
                             element->setEmpty();
                             return;
@@ -554,7 +552,6 @@
                     break;
                 case Element::kPath_Type:
                     if (!SkRect::Intersects(element->fPath.getBounds(), rect)) {
-                        this->purgeClip(element);
                         element->setEmpty();
                         return;
                     }
@@ -567,10 +564,6 @@
     }
     new (fDeque.push_back()) Element(fSaveCount, rect, op, doAA);
     ((Element*) fDeque.back())->updateBoundAndGenID(element);
-
-    if (element && element->fSaveCount == fSaveCount) {
-        this->purgeClip(element);
-    }
 }
 
 void SkClipStack::clipDevPath(const SkPath& path, SkRegion::Op op, bool doAA) {
@@ -589,14 +582,12 @@
                     return;
                 case Element::kRect_Type:
                     if (!SkRect::Intersects(element->fRect, pathBounds)) {
-                        this->purgeClip(element);
                         element->setEmpty();
                         return;
                     }
                     break;
                 case Element::kPath_Type:
                     if (!SkRect::Intersects(element->fPath.getBounds(), pathBounds)) {
-                        this->purgeClip(element);
                         element->setEmpty();
                         return;
                     }
@@ -609,10 +600,6 @@
     }
     new (fDeque.push_back()) Element(fSaveCount, path, op, doAA);
     ((Element*) fDeque.back())->updateBoundAndGenID(element);
-
-    if (element && element->fSaveCount == fSaveCount) {
-        this->purgeClip(element);
-    }
 }
 
 void SkClipStack::clipEmpty() {
@@ -626,27 +613,17 @@
                 return;
             case Element::kRect_Type:
             case Element::kPath_Type:
-                this->purgeClip(element);
                 element->setEmpty();
                 return;
         }
     }
     new (fDeque.push_back()) Element(fSaveCount);
 
-    if (element && element->fSaveCount == fSaveCount) {
-        this->purgeClip(element);
-    }
     ((Element*)fDeque.back())->fGenID = kEmptyGenID;
 }
 
 bool SkClipStack::isWideOpen() const {
-    if (0 == fDeque.count()) {
-        return true;
-    }
-
-    const Element* back = (const Element*) fDeque.back();
-    return kWideOpenGenID == back->fGenID ||
-           (kInsideOut_BoundsType == back->fFiniteBoundType && back->fFiniteBound.isEmpty());
+    return this->getTopmostGenID() == kWideOpenGenID;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -740,45 +717,20 @@
     }
 }
 
-void SkClipStack::addPurgeClipCallback(PFPurgeClipCB callback, void* data) const {
-    ClipCallbackData temp = { callback, data };
-    fCallbackData.append(1, &temp);
-}
-
-void SkClipStack::removePurgeClipCallback(PFPurgeClipCB callback, void* data) const {
-    ClipCallbackData temp = { callback, data };
-    int index = fCallbackData.find(temp);
-    if (index >= 0) {
-        fCallbackData.removeShuffle(index);
-    }
-}
-
-// The clip state represented by 'element' will never be used again. Purge it.
-void SkClipStack::purgeClip(Element* element) {
-    SkASSERT(NULL != element);
-    if (element->fGenID >= 0 && element->fGenID < kFirstUnreservedGenID) {
-        return;
-    }
-
-    for (int i = 0; i < fCallbackData.count(); ++i) {
-        (*fCallbackData[i].fCallback)(element->fGenID, fCallbackData[i].fData);
-    }
-
-    // Invalidate element's gen ID so handlers can detect already handled records
-    element->fGenID = kInvalidGenID;
-}
-
 int32_t SkClipStack::GetNextGenID() {
     // TODO: handle overflow.
     return sk_atomic_inc(&gGenID);
 }
 
 int32_t SkClipStack::getTopmostGenID() const {
-
     if (fDeque.empty()) {
-        return kInvalidGenID;
+        return kWideOpenGenID;
     }
 
-    Element* element = (Element*)fDeque.back();
-    return element->fGenID;
+    const Element* back = static_cast<const Element*>(fDeque.back());
+    if (kInsideOut_BoundsType == back->fFiniteBoundType && back->fFiniteBound.isEmpty()) {
+        return kWideOpenGenID;
+    }
+
+    return back->getGenID();
 }
diff --git a/src/gpu/GrClipMaskCache.h b/src/gpu/GrClipMaskCache.h
index 97b4b51..213e282 100644
--- a/src/gpu/GrClipMaskCache.h
+++ b/src/gpu/GrClipMaskCache.h
@@ -36,10 +36,6 @@
         SkASSERT(clipGenID != SkClipStack::kWideOpenGenID);
         SkASSERT(clipGenID != SkClipStack::kEmptyGenID);
 
-        if (SkClipStack::kInvalidGenID == clipGenID) {
-            return false;
-        }
-
         GrClipStackFrame* back = (GrClipStackFrame*) fStack.back();
 
         // We could reuse the mask if bounds is a subset of last bounds. We'd have to communicate
diff --git a/src/gpu/GrClipMaskManager.cpp b/src/gpu/GrClipMaskManager.cpp
index 0f91566..3aef3de 100644
--- a/src/gpu/GrClipMaskManager.cpp
+++ b/src/gpu/GrClipMaskManager.cpp
@@ -113,6 +113,7 @@
     fCurrClipMaskType = kNone_ClipMaskType;
 
     ElementList elements(16);
+    int32_t genID;
     InitialState initialState;
     SkIRect clipSpaceIBounds;
     bool requiresAA;
@@ -132,6 +133,7 @@
         ReduceClipStack(*clipDataIn->fClipStack,
                         clipSpaceRTIBounds,
                         &elements,
+                        &genID,
                         &initialState,
                         &clipSpaceIBounds,
                         &requiresAA);
@@ -156,7 +158,6 @@
 
     // If MSAA is enabled we can do everything in the stencil buffer.
     if (0 == rt->numSamples() && requiresAA) {
-        int32_t genID = clipDataIn->fClipStack->getTopmostGenID();
         GrTexture* result = NULL;
 
         if (this->useSWOnlyPath(elements)) {
@@ -207,7 +208,8 @@
 
     // use the stencil clip if we can't represent the clip as a rectangle.
     SkIPoint clipSpaceToStencilSpaceOffset = -clipDataIn->fOrigin;
-    this->createStencilClipMask(initialState,
+    this->createStencilClipMask(genID,
+                                initialState,
                                 elements,
                                 clipSpaceIBounds,
                                 clipSpaceToStencilSpaceOffset);
@@ -390,11 +392,11 @@
 // Handles caching & allocation (if needed) of a clip alpha-mask texture for both the sw-upload
 // or gpu-rendered cases. Returns true if there is no more work to be done (i.e., we got a cache
 // hit)
-bool GrClipMaskManager::getMaskTexture(int32_t clipStackGenID,
+bool GrClipMaskManager::getMaskTexture(int32_t elementsGenID,
                                        const SkIRect& clipSpaceIBounds,
                                        GrTexture** result,
                                        bool willUpload) {
-    bool cached = fAACache.canReuse(clipStackGenID, clipSpaceIBounds);
+    bool cached = fAACache.canReuse(elementsGenID, clipSpaceIBounds);
     if (!cached) {
 
         // There isn't a suitable entry in the cache so we create a new texture to store the mask.
@@ -412,7 +414,7 @@
             desc.fConfig = kAlpha_8_GrPixelConfig;
         }
 
-        fAACache.acquireMask(clipStackGenID, desc, clipSpaceIBounds);
+        fAACache.acquireMask(elementsGenID, desc, clipSpaceIBounds);
     }
 
     *result = fAACache.getLastMask();
@@ -421,14 +423,14 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 // Create a 8-bit clip mask in alpha
-GrTexture* GrClipMaskManager::createAlphaClipMask(int32_t clipStackGenID,
+GrTexture* GrClipMaskManager::createAlphaClipMask(int32_t elementsGenID,
                                                   InitialState initialState,
                                                   const ElementList& elements,
                                                   const SkIRect& clipSpaceIBounds) {
     SkASSERT(kNone_ClipMaskType == fCurrClipMaskType);
 
     GrTexture* result;
-    if (this->getMaskTexture(clipStackGenID, clipSpaceIBounds, &result, false)) {
+    if (this->getMaskTexture(elementsGenID, clipSpaceIBounds, &result, false)) {
         fCurrClipMaskType = kAlpha_ClipMaskType;
         return result;
     }
@@ -569,7 +571,8 @@
 ////////////////////////////////////////////////////////////////////////////////
 // Create a 1-bit clip mask in the stencil buffer. 'devClipBounds' are in device
 // (as opposed to canvas) coordinates
-bool GrClipMaskManager::createStencilClipMask(InitialState initialState,
+bool GrClipMaskManager::createStencilClipMask(int32_t elementsGenID,
+                                              InitialState initialState,
                                               const ElementList& elements,
                                               const SkIRect& clipSpaceIBounds,
                                               const SkIPoint& clipSpaceToStencilOffset) {
@@ -587,11 +590,10 @@
     if (NULL == stencilBuffer) {
         return false;
     }
-    int32_t genID = elements.tail()->getGenID();
 
-    if (stencilBuffer->mustRenderClip(genID, clipSpaceIBounds, clipSpaceToStencilOffset)) {
+    if (stencilBuffer->mustRenderClip(elementsGenID, clipSpaceIBounds, clipSpaceToStencilOffset)) {
 
-        stencilBuffer->setLastClip(genID, clipSpaceIBounds, clipSpaceToStencilOffset);
+        stencilBuffer->setLastClip(elementsGenID, clipSpaceIBounds, clipSpaceToStencilOffset);
 
         // Set the matrix so that rendered clip elements are transformed from clip to stencil space.
         SkVector translate = {
@@ -921,14 +923,14 @@
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-GrTexture* GrClipMaskManager::createSoftwareClipMask(int32_t clipStackGenID,
+GrTexture* GrClipMaskManager::createSoftwareClipMask(int32_t elementsGenID,
                                                      GrReducedClip::InitialState initialState,
                                                      const GrReducedClip::ElementList& elements,
                                                      const SkIRect& clipSpaceIBounds) {
     SkASSERT(kNone_ClipMaskType == fCurrClipMaskType);
 
     GrTexture* result;
-    if (this->getMaskTexture(clipStackGenID, clipSpaceIBounds, &result, true)) {
+    if (this->getMaskTexture(elementsGenID, clipSpaceIBounds, &result, true)) {
         return result;
     }
 
diff --git a/src/gpu/GrClipMaskManager.h b/src/gpu/GrClipMaskManager.h
index 015c801..f44a8e7 100644
--- a/src/gpu/GrClipMaskManager.h
+++ b/src/gpu/GrClipMaskManager.h
@@ -103,18 +103,19 @@
     GrClipMaskCache fAACache;       // cache for the AA path
 
     // Draws the clip into the stencil buffer
-    bool createStencilClipMask(GrReducedClip::InitialState initialState,
+    bool createStencilClipMask(int32_t elementsGenID,
+                               GrReducedClip::InitialState initialState,
                                const GrReducedClip::ElementList& elements,
                                const SkIRect& clipSpaceIBounds,
                                const SkIPoint& clipSpaceToStencilOffset);
     // Creates an alpha mask of the clip. The mask is a rasterization of elements through the
     // rect specified by clipSpaceIBounds.
-    GrTexture* createAlphaClipMask(int32_t clipStackGenID,
+    GrTexture* createAlphaClipMask(int32_t elementsGenID,
                                    GrReducedClip::InitialState initialState,
                                    const GrReducedClip::ElementList& elements,
                                    const SkIRect& clipSpaceIBounds);
     // Similar to createAlphaClipMask but it rasterizes in SW and uploads to the result texture.
-    GrTexture* createSoftwareClipMask(int32_t clipStackGenID,
+    GrTexture* createSoftwareClipMask(int32_t elementsGenID,
                                       GrReducedClip::InitialState initialState,
                                       const GrReducedClip::ElementList& elements,
                                       const SkIRect& clipSpaceIBounds);
@@ -122,7 +123,7 @@
     // Gets a texture to use for the clip mask. If true is returned then a cached mask was found
     // that already contains the rasterization of the clip stack, otherwise an uninitialized texture
     // is returned. 'willUpload' is set when the alpha mask needs to be uploaded from the CPU.
-    bool getMaskTexture(int32_t clipStackGenID,
+    bool getMaskTexture(int32_t elementsGenID,
                         const SkIRect& clipSpaceIBounds,
                         GrTexture** result,
                         bool willUpload);
diff --git a/src/gpu/GrReducedClip.cpp b/src/gpu/GrReducedClip.cpp
index a5f4519..8480e04 100644
--- a/src/gpu/GrReducedClip.cpp
+++ b/src/gpu/GrReducedClip.cpp
@@ -17,6 +17,7 @@
 void reduced_stack_walker(const SkClipStack& stack,
                           const SkRect& queryBounds,
                           ElementList* result,
+                          int32_t* resultGenID,
                           InitialState* initialState,
                           bool* requiresAA);
 
@@ -30,11 +31,17 @@
 void ReduceClipStack(const SkClipStack& stack,
                      const SkIRect& queryBounds,
                      ElementList* result,
+                     int32_t* resultGenID,
                      InitialState* initialState,
                      SkIRect* tighterBounds,
                      bool* requiresAA) {
     result->reset();
 
+    // The clip established by the element list might be cached based on the last
+    // generation id. When we make early returns, we do not know what was the generation
+    // id that lead to the state. Make a conservative guess.
+    *resultGenID = stack.getTopmostGenID();
+
     if (stack.isWideOpen()) {
         *initialState = kAllIn_InitialState;
         return;
@@ -70,7 +77,9 @@
                 SkRect scalarTighterBounds = SkRect::Make(*tighterBounds);
                 if (scalarTighterBounds == isectRect) {
                     // the round-out didn't add any area outside the clip rect.
-                    *requiresAA = false;
+                    if (NULL != requiresAA) {
+                        *requiresAA = false;
+                    }
                     *initialState = kAllIn_InitialState;
                     return;
                 }
@@ -123,12 +132,17 @@
 
     // Now that we have determined the bounds to use and filtered out the trivial cases, call the
     // helper that actually walks the stack.
-    reduced_stack_walker(stack, scalarBounds, result, initialState, requiresAA);
+    reduced_stack_walker(stack, scalarBounds, result, resultGenID, initialState, requiresAA);
+
+    // The list that was computed in this function may be cached based on the gen id of the last
+    // element.
+    SkASSERT(SkClipStack::kInvalidGenID != *resultGenID);
 }
 
 void reduced_stack_walker(const SkClipStack& stack,
                           const SkRect& queryBounds,
                           ElementList* result,
+                          int32_t* resultGenID,
                           InitialState* initialState,
                           bool* requiresAA) {
 
@@ -312,6 +326,11 @@
                 break;
         }
         if (!skippable) {
+            if (0 == result->count()) {
+                // This will be the last element. Record the stricter genID.
+                *resultGenID = element->getGenID();
+            }
+
             // if it is a flip, change it to a bounds-filling rect
             if (isFlip) {
                 SkASSERT(SkRegion::kXOR_Op == element->getOp() ||
@@ -417,5 +436,13 @@
     if (NULL != requiresAA) {
         *requiresAA = numAAElements > 0;
     }
+
+    if (0 == result->count()) {
+        if (*initialState == kAllIn_InitialState) {
+            *resultGenID = SkClipStack::kWideOpenGenID;
+        } else {
+            *resultGenID = SkClipStack::kEmptyGenID;
+        }
+    }
 }
 } // namespace GrReducedClip
diff --git a/src/gpu/GrReducedClip.h b/src/gpu/GrReducedClip.h
index abfc244..0b79f2c 100644
--- a/src/gpu/GrReducedClip.h
+++ b/src/gpu/GrReducedClip.h
@@ -20,7 +20,8 @@
 
 /**
  * This function takes a clip stack and a query rectangle and it produces a reduced set of
- * SkClipStack::Elements that are equivalent to applying the full stack to the rectangle. The
+ * SkClipStack::Elements that are equivalent to applying the full stack to the rectangle. The clip
+ * stack generation id that represents the list of elements is returned in resultGenID. The
  * initial state of the query rectangle before the first clip element is applied is returned via
  * initialState. Optionally, the caller can request a tighter bounds on the clip be returned via
  * tighterBounds. If not NULL, tighterBounds will always be contained by queryBounds after return.
@@ -33,6 +34,7 @@
 void ReduceClipStack(const SkClipStack& stack,
                      const SkIRect& queryBounds,
                      ElementList* result,
+                     int32_t* resultGenID,
                      InitialState* initialState,
                      SkIRect* tighterBounds = NULL,
                      bool* requiresAA = NULL);
diff --git a/src/gpu/GrStencilBuffer.h b/src/gpu/GrStencilBuffer.h
index 3765a4c..37d40f1 100644
--- a/src/gpu/GrStencilBuffer.h
+++ b/src/gpu/GrStencilBuffer.h
@@ -43,8 +43,7 @@
     bool mustRenderClip(int32_t clipStackGenID,
                         const SkIRect& clipSpaceRect,
                         const SkIPoint clipSpaceToStencilOffset) const {
-        return SkClipStack::kInvalidGenID == clipStackGenID ||
-               fLastClipStackGenID != clipStackGenID ||
+        return fLastClipStackGenID != clipStackGenID ||
                fLastClipSpaceOffset != clipSpaceToStencilOffset ||
                !fLastClipStackRect.contains(clipSpaceRect);
     }
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 4041c41..7630ccd 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -344,34 +344,15 @@
                                config, bitmap.getPixels(), bitmap.rowBytes(), flags);
 }
 
-namespace {
-void purgeClipCB(int genID, void* ) {
-
-    if (SkClipStack::kInvalidGenID == genID ||
-        SkClipStack::kEmptyGenID == genID ||
-        SkClipStack::kWideOpenGenID == genID) {
-        // none of these cases will have a cached clip mask
-        return;
-    }
-
-}
-};
-
 void SkGpuDevice::onAttachToCanvas(SkCanvas* canvas) {
     INHERITED::onAttachToCanvas(canvas);
 
     // Canvas promises that this ptr is valid until onDetachFromCanvas is called
     fClipData.fClipStack = canvas->getClipStack();
-
-    fClipData.fClipStack->addPurgeClipCallback(purgeClipCB, fContext);
 }
 
 void SkGpuDevice::onDetachFromCanvas() {
     INHERITED::onDetachFromCanvas();
-
-    // TODO: iterate through the clip stack and clean up any cached clip masks
-    fClipData.fClipStack->removePurgeClipCallback(purgeClipCB, fContext);
-
     fClipData.fClipStack = NULL;
 }
 
diff --git a/tests/ClipCacheTest.cpp b/tests/ClipCacheTest.cpp
index fab1f58..070f8eb 100644
--- a/tests/ClipCacheTest.cpp
+++ b/tests/ClipCacheTest.cpp
@@ -106,7 +106,6 @@
                         const SkClipStack& clip,
                         GrTexture* mask,
                         const SkIRect& bound) {
-    SkClipStack cacheClip;
     REPORTER_ASSERT(reporter, clip.getTopmostGenID() == cache.getLastClipGenID());
 
     REPORTER_ASSERT(reporter, mask == cache.getLastMask());
@@ -116,6 +115,19 @@
     REPORTER_ASSERT(reporter, bound == cacheBound);
 }
 
+static void check_empty_state(skiatest::Reporter* reporter,
+                              const GrClipMaskCache& cache) {
+    REPORTER_ASSERT(reporter, SkClipStack::kInvalidGenID == cache.getLastClipGenID());
+    REPORTER_ASSERT(reporter, NULL == cache.getLastMask());
+
+    SkIRect emptyBound;
+    emptyBound.setEmpty();
+
+    SkIRect cacheBound;
+    cache.getLastBound(&cacheBound);
+    REPORTER_ASSERT(reporter, emptyBound == cacheBound);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // basic test of the cache's base functionality:
 //  push, pop, set, canReuse & getters
@@ -128,14 +140,8 @@
 
     cache.setContext(context);
 
-    SkClipStack emptyClip;
-    emptyClip.reset();
-
-    SkIRect emptyBound;
-    emptyBound.setEmpty();
-
     // check initial state
-    check_state(reporter, cache, emptyClip, NULL, emptyBound);
+    check_empty_state(reporter, cache);
 
     // set the current state
     SkIRect bound1;
@@ -165,7 +171,7 @@
     cache.push();
 
     // verify that the pushed state is initially empty
-    check_state(reporter, cache, emptyClip, NULL, emptyBound);
+    check_empty_state(reporter, cache);
     REPORTER_ASSERT(reporter, texture1->getRefCnt());
 
     // modify the new state
@@ -202,7 +208,7 @@
     cache.reset();
 
     // verify it is now empty
-    check_state(reporter, cache, emptyClip, NULL, emptyBound);
+    check_empty_state(reporter, cache);
 
     // pop again - so there is no state
     cache.pop();
@@ -210,7 +216,7 @@
 #if !defined(SK_DEBUG)
     // verify that the getters don't crash
     // only do in release since it generates asserts in debug
-    check_state(reporter, cache, emptyClip, NULL, emptyBound);
+    check_empty_state(reporter, cache);
 #endif
 }
 
diff --git a/tests/ClipStackTest.cpp b/tests/ClipStackTest.cpp
index 04b48b6..704aa62 100644
--- a/tests/ClipStackTest.cpp
+++ b/tests/ClipStackTest.cpp
@@ -271,6 +271,7 @@
             }
 
             REPORTER_ASSERT(reporter, !stack.isWideOpen());
+            REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID != stack.getTopmostGenID());
 
             stack.getConservativeBounds(0, 0, 100, 100, &devClipBound,
                                         &isIntersectionOfRects);
@@ -293,6 +294,12 @@
 
 // Test out 'isWideOpen' entry point
 static void test_isWideOpen(skiatest::Reporter* reporter) {
+    {
+        // Empty stack is wide open. Wide open stack means that gen id is wide open.
+        SkClipStack stack;
+        REPORTER_ASSERT(reporter, stack.isWideOpen());
+        REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID == stack.getTopmostGenID());
+    }
 
     SkRect rectA, rectB;
 
@@ -304,6 +311,7 @@
         SkClipStack stack;
 
         REPORTER_ASSERT(reporter, stack.isWideOpen());
+        REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID == stack.getTopmostGenID());
     }
 
     // Test out case where the user specifies a union that includes everything
@@ -322,6 +330,7 @@
         stack.clipDevPath(clipB, SkRegion::kUnion_Op, false);
 
         REPORTER_ASSERT(reporter, stack.isWideOpen());
+        REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID == stack.getTopmostGenID());
     }
 
     // Test out union w/ a wide open clip
@@ -331,6 +340,7 @@
         stack.clipDevRect(rectA, SkRegion::kUnion_Op, false);
 
         REPORTER_ASSERT(reporter, stack.isWideOpen());
+        REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID == stack.getTopmostGenID());
     }
 
     // Test out empty difference from a wide open clip
@@ -343,6 +353,7 @@
         stack.clipDevRect(emptyRect, SkRegion::kDifference_Op, false);
 
         REPORTER_ASSERT(reporter, stack.isWideOpen());
+        REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID == stack.getTopmostGenID());
     }
 
     // Test out return to wide open
@@ -354,10 +365,12 @@
         stack.clipDevRect(rectA, SkRegion::kReplace_Op, false);
 
         REPORTER_ASSERT(reporter, !stack.isWideOpen());
+        REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID != stack.getTopmostGenID());
 
         stack.restore();
 
         REPORTER_ASSERT(reporter, stack.isWideOpen());
+        REPORTER_ASSERT(reporter, SkClipStack::kWideOpenGenID == stack.getTopmostGenID());
     }
 }
 
@@ -940,16 +953,19 @@
         typedef GrReducedClip::ElementList ElementList;
         // Get the reduced version of the stack.
         ElementList reducedClips;
-
+        int32_t reducedGenID;
         GrReducedClip::InitialState initial;
         SkIRect tBounds(inflatedIBounds);
         SkIRect* tightBounds = r.nextBool() ? &tBounds : NULL;
         GrReducedClip::ReduceClipStack(stack,
                                        inflatedIBounds,
                                        &reducedClips,
+                                       &reducedGenID,
                                        &initial,
                                        tightBounds);
 
+        REPORTER_ASSERT(reporter, SkClipStack::kInvalidGenID != reducedGenID);
+
         // Build a new clip stack based on the reduced clip elements
         SkClipStack reducedStack;
         if (GrReducedClip::kAllOut_InitialState == initial) {
@@ -986,6 +1002,162 @@
     }
 }
 
+#if defined(WIN32)
+    #define SUPPRESS_VISIBILITY_WARNING
+#else
+    #define SUPPRESS_VISIBILITY_WARNING __attribute__((visibility("hidden")))
+#endif
+
+static void test_reduced_clip_stack_genid(skiatest::Reporter* reporter) {
+    {
+        SkClipStack stack;
+        stack.clipDevRect(SkRect::MakeXYWH(0, 0, 100, 100), SkRegion::kReplace_Op, true);
+        stack.clipDevRect(SkRect::MakeXYWH(0, 0, SkScalar(50.3), SkScalar(50.3)), SkRegion::kReplace_Op, true);
+        SkIRect inflatedIBounds = SkIRect::MakeXYWH(0, 0, 100, 100);
+
+        GrReducedClip::ElementList reducedClips;
+        int32_t reducedGenID;
+        GrReducedClip::InitialState initial;
+        SkIRect tightBounds;
+
+        GrReducedClip::ReduceClipStack(stack,
+                                       inflatedIBounds,
+                                       &reducedClips,
+                                       &reducedGenID,
+                                       &initial,
+                                       &tightBounds);
+
+        REPORTER_ASSERT(reporter, reducedClips.count() == 1);
+        // Clips will be cached based on the generation id. Make sure the gen id is valid.
+        REPORTER_ASSERT(reporter, SkClipStack::kInvalidGenID != reducedGenID);
+    }
+    {
+        SkClipStack stack;
+
+        // Create a clip with following 25.3, 25.3 boxes which are 25 apart:
+        //  A  B
+        //  C  D
+
+        stack.clipDevRect(SkRect::MakeXYWH(0, 0, SkScalar(25.3), SkScalar(25.3)), SkRegion::kReplace_Op, true);
+        int32_t genIDA = stack.getTopmostGenID();
+        stack.clipDevRect(SkRect::MakeXYWH(50, 0, SkScalar(25.3), SkScalar(25.3)), SkRegion::kUnion_Op, true);
+        int32_t genIDB = stack.getTopmostGenID();
+        stack.clipDevRect(SkRect::MakeXYWH(0, 50, SkScalar(25.3), SkScalar(25.3)), SkRegion::kUnion_Op, true);
+        int32_t genIDC = stack.getTopmostGenID();
+        stack.clipDevRect(SkRect::MakeXYWH(50, 50, SkScalar(25.3), SkScalar(25.3)), SkRegion::kUnion_Op, true);
+        int32_t genIDD = stack.getTopmostGenID();
+
+
+#define XYWH SkIRect::MakeXYWH
+
+        SkIRect unused;
+        unused.setEmpty();
+        SkIRect stackBounds = XYWH(0, 0, 76, 76);
+
+        // The base test is to test each rect in two ways:
+        // 1) The box dimensions. (Should reduce to "all in", no elements).
+        // 2) A bit over the box dimensions.
+        // In the case 2, test that the generation id is what is expected.
+        // The rects are of fractional size so that case 2 never gets optimized to an empty element
+        // list.
+
+        // Not passing in tighter bounds is tested for consistency.
+        static const struct SUPPRESS_VISIBILITY_WARNING {
+            SkIRect testBounds;
+            int reducedClipCount;
+            int32_t reducedGenID;
+            GrReducedClip::InitialState initialState;
+            SkIRect tighterBounds; // If this is empty, the query will not pass tighter bounds
+            // parameter.
+        } testCases[] = {
+            // Rect A.
+            { XYWH(0, 0, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, XYWH(0, 0, 25, 25) },
+            { XYWH(0, 0, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, unused },
+            { XYWH(0, 0, 27, 27), 1, genIDA, GrReducedClip::kAllOut_InitialState, XYWH(0, 0, 27, 27)},
+            { XYWH(0, 0, 27, 27), 1, genIDA, GrReducedClip::kAllOut_InitialState, unused },
+
+            // Rect B.
+            { XYWH(50, 0, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, XYWH(50, 0, 25, 25) },
+            { XYWH(50, 0, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, unused },
+            { XYWH(50, 0, 27, 27), 1, genIDB, GrReducedClip::kAllOut_InitialState, XYWH(50, 0, 26, 27) },
+            { XYWH(50, 0, 27, 27), 1, genIDB, GrReducedClip::kAllOut_InitialState, unused },
+
+            // Rect C.
+            { XYWH(0, 50, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, XYWH(0, 50, 25, 25) },
+            { XYWH(0, 50, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, unused },
+            { XYWH(0, 50, 27, 27), 1, genIDC, GrReducedClip::kAllOut_InitialState, XYWH(0, 50, 27, 26) },
+            { XYWH(0, 50, 27, 27), 1, genIDC, GrReducedClip::kAllOut_InitialState, unused },
+
+            // Rect D.
+            { XYWH(50, 50, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, unused },
+            { XYWH(50, 50, 25, 25), 0, SkClipStack::kWideOpenGenID, GrReducedClip::kAllIn_InitialState, XYWH(50, 50, 25, 25)},
+            { XYWH(50, 50, 27, 27), 1, genIDD, GrReducedClip::kAllOut_InitialState, unused },
+            { XYWH(50, 50, 27, 27), 1, genIDD, GrReducedClip::kAllOut_InitialState,  XYWH(50, 50, 26, 26)},
+
+            // Other tests:
+            { XYWH(0, 0, 100, 100), 4, genIDD, GrReducedClip::kAllOut_InitialState, unused },
+            { XYWH(0, 0, 100, 100), 4, genIDD, GrReducedClip::kAllOut_InitialState, stackBounds },
+
+            // Rect in the middle, touches none.
+            { XYWH(26, 26, 24, 24), 0, SkClipStack::kEmptyGenID, GrReducedClip::kAllOut_InitialState, unused },
+            { XYWH(26, 26, 24, 24), 0, SkClipStack::kEmptyGenID, GrReducedClip::kAllOut_InitialState, XYWH(26, 26, 24, 24) },
+
+            // Rect in the middle, touches all the rects. GenID is the last rect.
+            { XYWH(24, 24, 27, 27), 4, genIDD, GrReducedClip::kAllOut_InitialState, unused },
+            { XYWH(24, 24, 27, 27), 4, genIDD, GrReducedClip::kAllOut_InitialState, XYWH(24, 24, 27, 27) },
+        };
+
+#undef XYWH
+
+        for (size_t i = 0; i < SK_ARRAY_COUNT(testCases); ++i) {
+            GrReducedClip::ElementList reducedClips;
+            int32_t reducedGenID;
+            GrReducedClip::InitialState initial;
+            SkIRect tightBounds;
+
+            GrReducedClip::ReduceClipStack(stack,
+                                           testCases[i].testBounds,
+                                           &reducedClips,
+                                           &reducedGenID,
+                                           &initial,
+                                           testCases[i].tighterBounds.isEmpty() ? NULL : &tightBounds);
+
+            REPORTER_ASSERT(reporter, reducedClips.count() == testCases[i].reducedClipCount);
+            SkASSERT(reducedClips.count() == testCases[i].reducedClipCount);
+            REPORTER_ASSERT(reporter, reducedGenID == testCases[i].reducedGenID);
+            SkASSERT(reducedGenID == testCases[i].reducedGenID);
+            REPORTER_ASSERT(reporter, initial == testCases[i].initialState);
+            SkASSERT(initial == testCases[i].initialState);
+            if (!testCases[i].tighterBounds.isEmpty()) {
+                REPORTER_ASSERT(reporter, tightBounds == testCases[i].tighterBounds);
+                SkASSERT(tightBounds == testCases[i].tighterBounds);
+            }
+        }
+    }
+}
+
+static void test_reduced_clip_stack_no_aa_crash(skiatest::Reporter* reporter) {
+    SkClipStack stack;
+    stack.clipDevRect(SkIRect::MakeXYWH(0, 0, 100, 100), SkRegion::kReplace_Op);
+    stack.clipDevRect(SkIRect::MakeXYWH(0, 0, 50, 50), SkRegion::kReplace_Op);
+    SkIRect inflatedIBounds = SkIRect::MakeXYWH(0, 0, 100, 100);
+
+    GrReducedClip::ElementList reducedClips;
+    int32_t reducedGenID;
+    GrReducedClip::InitialState initial;
+    SkIRect tightBounds;
+
+    // At the time, this would crash.
+    GrReducedClip::ReduceClipStack(stack,
+                                   inflatedIBounds,
+                                   &reducedClips,
+                                   &reducedGenID,
+                                   &initial,
+                                   &tightBounds);
+
+    REPORTER_ASSERT(reporter, 0 == reducedClips.count());
+}
+
 #endif
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -1034,6 +1206,8 @@
     test_quickContains(reporter);
 #if SK_SUPPORT_GPU
     test_reduced_clip_stack(reporter);
+    test_reduced_clip_stack_genid(reporter);
+    test_reduced_clip_stack_no_aa_crash(reporter);
 #endif
 }