Fix op chaining painter's order violation in GrRenderTargetOpList.

Add unit test of op chaining.

Relax bounds checks in op merging/chaining to only check bounds against
heads of op chains.

Change-Id: I714435913b901c0a068bc7233ca30f2ab7916c2e
Reviewed-on: https://skia-review.googlesource.com/148380
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/gn/tests.gni b/gn/tests.gni
index cc480ee..9731514 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -146,6 +146,7 @@
   "$_tests/MipMapTest.cpp",
   "$_tests/NonlinearBlendingTest.cpp",
   "$_tests/OnceTest.cpp",
+  "$_tests/OpChainTest.cpp",
   "$_tests/OSPathTest.cpp",
   "$_tests/OverAlignedTest.cpp",
   "$_tests/PackBitsTest.cpp",
diff --git a/src/gpu/GrRenderTargetOpList.cpp b/src/gpu/GrRenderTargetOpList.cpp
index 9e8a8f4..d7ace8f 100644
--- a/src/gpu/GrRenderTargetOpList.cpp
+++ b/src/gpu/GrRenderTargetOpList.cpp
@@ -379,8 +379,11 @@
                 case GrOp::CombineResult::kCannotCombine:
                     break;
             }
-            // Stop going backwards if we would cause a painter's order violation.
-            if (!can_reorder(fRecordedOps.fromBack(i).fOp->bounds(), op->bounds())) {
+            // Stop going backwards if we would cause a painter's order violation. We only need to
+            // test against chain heads as elements of a chain always draw in their chain head's
+            // slot.
+            if (candidate.fOp->isChainHead() &&
+                !can_reorder(candidate.fOp->bounds(), op->bounds())) {
                 GrOP_INFO("\t\tBackward: Intersects with (%s, opID: %u)\n", candidate.fOp->name(),
                           candidate.fOp->uniqueID());
                 break;
@@ -400,8 +403,33 @@
         SkDEBUGCODE(fNumClips++;)
     }
     if (firstChainableIdx >= 0) {
-        GrOP_INFO("\t\t\tBackward: Chained\n");
-        fRecordedOps.fromBack(firstChainableIdx).fOp->setNextInChain(op.get());
+        // If we chain this op it will draw in the slot of the head of the chain. We have to check
+        // that the new op's bounds don't intersect any of the other ops between firstChainableIdx
+        // and the head of that op's chain. We only need to test against chain heads as elements of
+        // a chain always draw in their chain head's slot.
+        const GrOp* chainHead = fRecordedOps.fromBack(firstChainableIdx).fOp->chainHead();
+        int idx = firstChainableIdx;
+        bool chain = true;
+        while (fRecordedOps.fromBack(idx).fOp.get() != chainHead) {
+            // If idx is not in the same chain then we have to check against its bounds as we will
+            // draw before it (when chainHead draws).
+            const GrOp* testOp = fRecordedOps.fromBack(idx).fOp.get();
+            if (testOp->isChainHead() && !can_reorder(testOp->bounds(), op->bounds())) {
+                GrOP_INFO("\t\tBackward: Intersects with (%s, opID: %u). Cannot chain.\n",
+                          testOp->name(), testOp->uniqueID());
+                chain = false;
+                break;
+            }
+            ++idx;
+            // We must encounter the chain head before running off the beginning of the list.
+            SkASSERT(idx < fRecordedOps.count());
+        }
+        if (chain) {
+            GrOp* prevOp = fRecordedOps.fromBack(firstChainableIdx).fOp.get();
+            GrOP_INFO("\t\t\tBackward: Chained to (%s, opID: %u)\n", prevOp->name(),
+                      prevOp->uniqueID());
+            prevOp->setNextInChain(op.get());
+        }
     }
     fRecordedOps.emplace_back(std::move(op), clip, dstProxy);
     return this->uniqueID();
@@ -447,7 +475,8 @@
                 break;
             }
             // Stop traversing if we would cause a painter's order violation.
-            if (!can_reorder(fRecordedOps[j].fOp->bounds(), op->bounds())) {
+            if (candidate.fOp->isChainHead() &&
+                !can_reorder(candidate.fOp->bounds(), op->bounds())) {
                 GrOP_INFO("\t\t%d: (%s opID: %u) -> Intersects with (%s, opID: %u)\n",
                           i, op->name(), op->uniqueID(),
                           candidate.fOp->name(), candidate.fOp->uniqueID());
@@ -456,10 +485,12 @@
             ++j;
             if (j > maxCandidateIdx) {
                 if (firstChainableIdx >= 0) {
-                    GrOP_INFO("\t\t\tForward: Chained\n");
+                    GrOp* nextOp = fRecordedOps[firstChainableIdx].fOp.get();
+                    GrOP_INFO("\t\t\tForward: Chained to (%s, opID: %u)\n", nextOp->name(),
+                              nextOp->uniqueID());
                     // We have to chain i before firstChainableIdx in order to preserve their
                     // relative order as they may overlap.
-                    fRecordedOps[i].fOp->setNextInChain(fRecordedOps[firstChainableIdx].fOp.get());
+                    fRecordedOps[i].fOp->setNextInChain(nextOp);
                     // However we want to draw them *after* any ops that occur between them. So move
                     // the head of the new chain to the later slot as we only execute chain heads.
                     std::swap(fRecordedOps[i].fOp, fRecordedOps[firstChainableIdx].fOp);
diff --git a/src/gpu/ops/GrOp.h b/src/gpu/ops/GrOp.h
index c6b0d3c..d23f781 100644
--- a/src/gpu/ops/GrOp.h
+++ b/src/gpu/ops/GrOp.h
@@ -184,7 +184,7 @@
         public:
             explicit Iter(const GrOp* head) : fCurr(head) {}
             inline Iter& operator++() { return *this = Iter(fCurr->nextInChain()); }
-            const OpSubclass& operator*() const { return *static_cast<const OpSubclass*>(fCurr); }
+            const OpSubclass& operator*() const { return fCurr->cast<OpSubclass>(); }
             bool operator!=(const Iter& that) const { return fCurr != that.fCurr; }
 
         private:
@@ -201,6 +201,8 @@
     void setNextInChain(GrOp*);
     /** Returns true if this is the head of a chain (including a length 1 chain). */
     bool isChainHead() const { return !fChainHead || (fChainHead == this); }
+    /** Gets the head op of the chain. */
+    const GrOp* chainHead() const { return fChainHead ? fChainHead : this; }
     /** Returns true if this is the tail of a chain (including a length 1 chain). */
     bool isChainTail() const { return !fNextInChain; }
     /** Returns true if this is part of chain with length > 1. */
diff --git a/tests/OpChainTest.cpp b/tests/OpChainTest.cpp
new file mode 100644
index 0000000..0bd1e56
--- /dev/null
+++ b/tests/OpChainTest.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrContext.h"
+#include "GrContextPriv.h"
+#include "GrMemoryPool.h"
+#include "GrOpFlushState.h"
+#include "GrRenderTargetOpList.h"
+#include "Test.h"
+#include "ops/GrOp.h"
+
+// The number of configurations is 2^Permutations(kNumOps, 2) * kNumOps!. To make this any larger
+// we'd have to probabilistically sample the space. At four this finishes in a very reasonable
+// amount of time but at five it takes nearly 2 minutes in a Release build on a Z840. Four seems
+// like it generates sufficiently complex test cases.
+static constexpr int kNumOps = 4;
+
+static constexpr int fact(int n) {
+    assert(n > 0);
+    return n > 1 ? n * fact(n - 1) : 1;
+}
+
+// Number of possible allowable binary chainings among the kNumOps ops.
+static constexpr int kNumChainabilityBits = fact(kNumOps) / fact(kNumOps - 2);
+
+// We store the chainability booleans as a 32 bit bitfield.
+GR_STATIC_ASSERT(kNumChainabilityBits <= 32);
+
+static constexpr uint64_t kNumChainabilityPermutations = 1 << kNumChainabilityBits;
+
+/** Is op a chainable to op b as indicated by the bitfield? */
+static bool is_chainable(int a, int b, uint32_t chainabilityBits) {
+    SkASSERT(b != a);
+    // Each index gets kNumOps - 1 contiguous bits
+    int aOffset = a * (kNumOps - 1);
+    // Within a's range we give one bit each other op, but not one for itself.
+    int bIdxInA = b < a ? b : b - 1;
+    return chainabilityBits & (1 << (aOffset + bIdxInA));
+}
+
+namespace {
+/**
+ * A simple test op. It has an integer position, p. When it executes it writes p into an array
+ * of ints at index p and p+1. It takes a bitfield that indicates allowed pair-wise chainings.
+ */
+class TestOp : public GrOp {
+public:
+    DEFINE_OP_CLASS_ID
+
+    static std::unique_ptr<TestOp> Make(GrContext* context, int pos, int result[],
+                                        uint32_t chainability) {
+        GrOpMemoryPool* pool = context->contextPriv().opMemoryPool();
+        return pool->allocate<TestOp>(pos, result, chainability);
+    }
+
+    const char* name() const override { return "TestOp"; }
+
+    void writeResult(int result[]) const { result[fPos + 1] = result[fPos] = fPos; }
+
+private:
+    friend class ::GrOpMemoryPool;  // for ctor
+
+    TestOp(int pos, int result[], uint32_t chainability)
+            : INHERITED(ClassID()), fPos(pos), fResult(result), fChainability(chainability) {
+        // Each op writes 2 values (at pos and pos+1) in a (kNumOps + 1) x 1 buffer.
+        this->setBounds(SkRect::MakeXYWH(pos, 0, 2, 1), HasAABloat::kNo, IsZeroArea::kNo);
+    }
+
+    void onPrepare(GrOpFlushState*) override {}
+
+    void onExecute(GrOpFlushState*) override {
+        for (auto& op : ChainRange<TestOp>(this)) {
+            op.writeResult(fResult);
+        }
+    }
+
+    CombineResult onCombineIfPossible(GrOp* that, const GrCaps&) override {
+        return is_chainable(fPos, that->cast<TestOp>()->fPos, fChainability)
+                       ? CombineResult::kMayChain
+                       : CombineResult::kCannotCombine;
+    }
+
+    int fPos;
+    int* fResult;
+    uint32_t fChainability;
+
+    typedef GrOp INHERITED;
+};
+}  // namespace
+
+/**
+ * Tests adding kNumOps to an op list with all possible allowed chaining configurations. Tests
+ * adding the ops in all possible orders and verifies that the chained executions don't violate
+ * painter's order.
+ */
+DEF_GPUTEST(OpChainTest, reporter, /*ctxInfo*/) {
+    auto context = GrContext::MakeMock(nullptr);
+    SkASSERT(context);
+    GrSurfaceDesc desc;
+    desc.fConfig = kRGBA_8888_GrPixelConfig;
+    desc.fWidth = kNumOps + 1;
+    desc.fHeight = 1;
+    desc.fFlags = kRenderTarget_GrSurfaceFlag;
+
+    auto proxy = context->contextPriv().proxyProvider()->createProxy(
+            desc, kTopLeft_GrSurfaceOrigin, GrMipMapped::kNo, SkBackingFit::kExact, SkBudgeted::kNo,
+            GrInternalSurfaceFlags::kNone);
+    SkASSERT(proxy);
+    proxy->instantiate(context->contextPriv().resourceProvider());
+    int result[kNumOps + 1];
+    int validResult[kNumOps + 1];
+
+    int permutation[kNumOps];
+    for (int i = 0; i < kNumOps; ++i) {
+        permutation[i] = i;
+    }
+    do {
+        for (uint32_t chainabilityBits = 0; chainabilityBits < kNumChainabilityPermutations;
+             ++chainabilityBits) {
+            GrTokenTracker tracker;
+            GrOpFlushState flushState(context->contextPriv().getGpu(),
+                                      context->contextPriv().resourceProvider(), &tracker);
+            GrRenderTargetOpList opList(context->contextPriv().resourceProvider(),
+                                        sk_ref_sp(context->contextPriv().opMemoryPool()),
+                                        proxy->asRenderTargetProxy(),
+                                        context->contextPriv().getAuditTrail());
+            std::fill(result, result + kNumOps + 1, -1);
+            std::fill(validResult, validResult + kNumOps + 1, -1);
+            for (int i = 0; i < kNumOps; ++i) {
+                auto op = TestOp::Make(context.get(), permutation[i], result, chainabilityBits);
+                op->writeResult(validResult);
+                opList.addOp(std::move(op), *context->contextPriv().caps());
+            }
+            opList.makeClosed(*context->contextPriv().caps());
+            opList.prepare(&flushState);
+            opList.execute(&flushState);
+            opList.endFlush();
+            REPORTER_ASSERT(reporter, std::equal(result, result + kNumOps + 1, validResult));
+        }
+    } while (std::next_permutation(permutation, permutation + kNumOps));
+}