Add a new implementation of reduceOpsTaskSplitting Currently this doesn't actually handle going over the memory budget, but it does carve out space to do that later. This algorithm can't handle everything yet but I want to get it landed for more iteration as long as it's disabled. Bug: skia:10877 Change-Id: I37942172345e8cfd6fc2c591a3788a10652377da Reviewed-on: https://skia-review.googlesource.com/c/skia/+/345168 Commit-Queue: Adlai Holler <adlai@google.com> Reviewed-by: Robert Phillips <robertphillips@google.com> Reviewed-by: Brian Salomon <bsalomon@google.com> Auto-Submit: Adlai Holler <adlai@google.com>

commit: 78036086cf9b2520758b0cb20595ea021fdbc522 [log] [tgz]
author: Adlai Holler <adlai@google.com> Thu Jan 07 11:41:33 2021 -0500
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> Thu Jan 07 17:20:03 2021 +0000
tree: d81d6746bde42e56a028b000d4faa4f8ec148259
parent: 5779a86072b468b52cbcc54566fcf5f561f92d47 [diff]
diff --git a/src/gpu/GrDrawingManager.cpp b/src/gpu/GrDrawingManager.cpp
index 4f8180b..8ca4553 100644
--- a/src/gpu/GrDrawingManager.cpp
+++ b/src/gpu/GrDrawingManager.cpp

@@ -15,6 +15,7 @@
 #include "include/gpu/GrDirectContext.h"
 #include "include/gpu/GrRecordingContext.h"
 #include "src/core/SkDeferredDisplayListPriv.h"
+#include "src/core/SkTInternalLList.h"
 #include "src/gpu/GrAuditTrail.h"
 #include "src/gpu/GrClientMappedBufferManager.h"
 #include "src/gpu/GrCopyRenderTask.h"
@@ -32,6 +33,7 @@
 #include "src/gpu/GrSurfaceContext.h"
 #include "src/gpu/GrSurfaceDrawContext.h"
 #include "src/gpu/GrSurfaceProxyPriv.h"
+#include "src/gpu/GrTCluster.h"
 #include "src/gpu/GrTTopoSort.h"
 #include "src/gpu/GrTexture.h"
 #include "src/gpu/GrTextureProxy.h"
@@ -135,6 +137,11 @@
     fActiveOpsTask = nullptr;
 
     this->sortTasks();
+
+    if (fReduceOpsTaskSplitting) {
+        this->reorderTasks();
+    }
+
     if (!fCpuBufferCache) {
         // We cache more buffers when the backend is using client side arrays. Otherwise, we
         // expect each pool will use a CPU buffer as a staging buffer before uploading to a GPU
@@ -415,6 +422,32 @@
 #endif
 }
 
+// Reorder the array to match the llist without reffing & unreffing sk_sp's.
+// Both args must contain the same objects.
+// This is basically a shim because clustering uses LList but the rest of drawmgr uses array.
+template <typename T>
+static void reorder_array_by_llist(const SkTInternalLList<T>& llist, SkTArray<sk_sp<T>>* array) {
+    int i = 0;
+    for (T* t : llist) {
+        // Release the pointer that used to live here so it doesn't get unreffed.
+        [[maybe_unused]] T* old = array->at(i).release();
+        array->at(i++).reset(t);
+    }
+    SkASSERT(i == array->count());
+}
+
+void GrDrawingManager::reorderTasks() {
+    SkASSERT(fReduceOpsTaskSplitting);
+    SkTInternalLList<GrRenderTask> llist;
+    bool clustered = GrTCluster<GrRenderTask, GrRenderTask::ClusterTraits>(fDAG, &llist);
+    if (!clustered) {
+        return;
+    }
+    // TODO: Handle case where proposed order would blow our memory budget.
+    // Such cases are currently pathological, so we could just return here and keep current order.
+    reorder_array_by_llist(llist, &fDAG);
+}
+
 void GrDrawingManager::closeAllTasks() {
     const GrCaps& caps = *fContext->priv().caps();
     for (auto& task : fDAG) {

diff --git a/src/gpu/GrDrawingManager.h b/src/gpu/GrDrawingManager.h
index d199ee0..72c6c15 100644
--- a/src/gpu/GrDrawingManager.h
+++ b/src/gpu/GrDrawingManager.h

@@ -135,6 +135,7 @@
     void removeRenderTasks(int startIndex, int stopIndex);
 
     void sortTasks();
+    void reorderTasks();
 
     void closeAllTasks();
 

diff --git a/src/gpu/GrRenderTask.cpp b/src/gpu/GrRenderTask.cpp
index 460454a..b1a4b15 100644
--- a/src/gpu/GrRenderTask.cpp
+++ b/src/gpu/GrRenderTask.cpp

@@ -229,7 +229,7 @@
 }
 
 #ifdef SK_DEBUG
-bool GrRenderTask::isDependedent(const GrRenderTask* dependent) const {
+bool GrRenderTask::isDependent(const GrRenderTask* dependent) const {
     for (int i = 0; i < fDependents.count(); ++i) {
         if (fDependents[i] == dependent) {
             return true;
@@ -243,7 +243,7 @@
     // TODO: check for loops and duplicates
 
     for (int i = 0; i < fDependencies.count(); ++i) {
-        SkASSERT(fDependencies[i]->isDependedent(this));
+        SkASSERT(fDependencies[i]->isDependent(this));
     }
 }
 #endif

diff --git a/src/gpu/GrRenderTask.h b/src/gpu/GrRenderTask.h
index 08c6253..4b23b71 100644
--- a/src/gpu/GrRenderTask.h
+++ b/src/gpu/GrRenderTask.h

@@ -10,6 +10,7 @@
 
 #include "include/core/SkRefCnt.h"
 #include "include/private/SkTArray.h"
+#include "src/core/SkTInternalLList.h"
 #include "src/gpu/GrSurfaceProxyView.h"
 #include "src/gpu/GrTextureProxy.h"
 #include "src/gpu/GrTextureResolveManager.h"
@@ -122,6 +123,9 @@
     // it is required)?
     bool isInstantiated() const;
 
+    // Used by GrTCluster.
+    SK_DECLARE_INTERNAL_LLIST_INTERFACE(GrRenderTask);
+
 protected:
     SkDEBUGCODE(bool deferredProxiesAreInstantiated() const;)
 
@@ -189,7 +193,7 @@
 
     void addDependency(GrRenderTask* dependedOn);
     void addDependent(GrRenderTask* dependent);
-    SkDEBUGCODE(bool isDependedent(const GrRenderTask* dependent) const;)
+    SkDEBUGCODE(bool isDependent(const GrRenderTask* dependent) const;)
     SkDEBUGCODE(void validate() const;)
     void closeThoseWhoDependOnMe(const GrCaps&);
 
@@ -223,6 +227,23 @@
         }
     };
 
+    struct ClusterTraits {
+        static int NumTargets(GrRenderTask* renderTask) {
+            return renderTask->numTargets();
+        }
+        static uint32_t GetTarget(GrRenderTask* renderTask, int index) {
+            return renderTask->target(index).proxy()->uniqueID().asUInt();
+        }
+        static int NumDependencies(const GrRenderTask* renderTask) {
+            return renderTask->fDependencies.count();
+        }
+        static GrRenderTask* Dependency(GrRenderTask* renderTask, int index) {
+            return renderTask->fDependencies[index];
+        }
+        static uint32_t GetID(GrRenderTask* renderTask) {
+            return renderTask->uniqueID();
+        }
+    };
 
     virtual void onPrePrepare(GrRecordingContext*) {} // Only the GrOpsTask currently overrides this
     virtual void onPrepare(GrOpFlushState*) {} // Only GrOpsTask and GrDDLTask override this virtual

diff --git a/src/gpu/GrTCluster.h b/src/gpu/GrTCluster.h
new file mode 100644
index 0000000..2d7d5cf
--- /dev/null
+++ b/src/gpu/GrTCluster.h

@@ -0,0 +1,201 @@
+/*
+ * Copyright 2020 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrTCluster_DEFINED
+#define GrTCluster_DEFINED
+
+#include "include/core/SkRefCnt.h"
+#include "include/private/SkTHash.h"
+#include "src/core/SkScopeExit.h"
+#include "src/core/SkSpan.h"
+#include "src/core/SkTInternalLList.h"
+
+// Uncomment to get lots of logging.
+#define CLUSTER_DEBUGF(...) //SkDebugf(__VA_ARGS__)
+
+#ifdef SK_DEBUG
+template <typename T, typename Traits>
+SkString GrTCluster_DebugStr(T* t) {
+    if (Traits::NumTargets(t) != 1) {
+        return SkStringPrintf("%d", Traits::GetID(t));
+    } else {
+        return SkStringPrintf("%d(%d)", Traits::GetID(t), Traits::GetTarget(t, 0));
+    }
+}
+
+template <typename T, typename Traits>
+SkString GrTCluster_DebugStr(SkSpan<const sk_sp<T>> collection) {
+    SkString s;
+    for (const sk_sp<T>& t_sp : collection) {
+        s.appendf("%s ", GrTCluster_DebugStr<T, Traits>(t_sp.get()).c_str());
+    }
+    return s;
+}
+
+template <typename T, typename Traits>
+SkString GrTCluster_DebugStr(const SkTInternalLList<T>& collection) {
+    SkString s;
+    for (T* t : collection) {
+        s.appendf("%s ", GrTCluster_DebugStr<T, Traits>(t).c_str());
+    }
+    return s;
+}
+
+template<typename T, typename Traits>
+void GrTCluster_Validate(SkSpan<const sk_sp<T>> input, const SkTInternalLList<T>& llist) {
+    // Check that we didn't break dependencies.
+    SkTHashSet<T*> seen;
+    for (T* t : llist) {
+        seen.add(t);
+        for (int i = 0; i < Traits::NumDependencies(t); i++) {
+            T* dep = Traits::Dependency(t, i);
+            SkASSERTF(seen.contains(dep),
+                      "%s came before dependency %s",
+                      GrTCluster_DebugStr<T, Traits>(t).c_str(),
+                      GrTCluster_DebugStr<T, Traits>(dep).c_str());
+        }
+    }
+    // Check that llist has the same entries as the input.
+    for (const auto& orig : input) {
+        seen.remove(orig.get());
+    }
+    SkASSERT(seen.empty());
+}
+
+#endif  // SK_DEBUG
+
+// Returns whether reordering occurred.
+template <typename T, typename Traits>
+bool GrTCluster_Visit(T* task, SkTInternalLList<T>* llist,
+                      SkTHashMap<uint32_t, T*>* lastTaskMap) {
+    SkScopeExit exit([&]{
+        llist->addToTail(task);
+        CLUSTER_DEBUGF("Cluster: Output order is now: %s\n",
+                       GrTCluster_DebugStr<T, Traits>(*llist).c_str());
+    });
+    CLUSTER_DEBUGF("Cluster: ***Step***\nLooking at %s\n",
+                   GrTCluster_DebugStr<T, Traits>(task).c_str());
+    if (Traits::NumTargets(task) != 1) {
+        CLUSTER_DEBUGF("Cluster: %d targets. Emitting barriers.\n", Traits::NumTargets(task));
+        // Tasks with 0 or multiple targets are treated as full barriers
+        // for all their targets.
+        for (int j = 0; j < Traits::NumTargets(task); j++) {
+            lastTaskMap->remove(Traits::GetTarget(task, j));
+        }
+        return false;
+    }
+
+    uint32_t target = Traits::GetTarget(task, 0);
+    T* clusterTail = (lastTaskMap->find(target) ? *lastTaskMap->find(target) : nullptr);
+    lastTaskMap->set(target, task);
+
+    if (!clusterTail) {
+        CLUSTER_DEBUGF("Cluster: Bail, no cluster to extend.\n", SkToInt(target));
+        return false;
+    }
+
+    CLUSTER_DEBUGF("Cluster: clusterTail is %s.\n",
+                   GrTCluster_DebugStr<T, Traits>(clusterTail).c_str());
+
+    if (clusterTail == llist->tail()) {
+        CLUSTER_DEBUGF("Cluster: Bail, cluster is already tail.\n");
+        return false;
+    }
+    T* movedHead = clusterTail->fNext;
+
+    // Now, let's refer to the "cluster" as the chain of tasks with the same target, that we're
+    // hoping to extend by reordering. Let "moved tasks" be the ones we want to move to extend the
+    // cluster.
+    T* clusterHead = clusterTail;
+    while (clusterHead->fPrev
+           && 1 == Traits::NumTargets(clusterHead->fPrev)
+           && target == Traits::GetTarget(clusterHead->fPrev, 0)) {
+        clusterHead = clusterHead->fPrev;
+    }
+
+    // We can't reorder if any moved task depends on anything in the cluster.
+    // Collect the cluster into a hash set.
+    // TODO: Is this overkill? Is a linear search appropriate for small clusters?
+    SkTHashSet<T*> clusterSet;
+    for (T* t = clusterHead; t != movedHead; t = t->fNext) {
+        clusterSet.add(t);
+    }
+    // Then check the constraint.
+    for (T* moved = movedHead; moved; moved = moved->fNext) {
+        for (int j = 0; j < Traits::NumDependencies(moved); j++) {
+            T* dep = Traits::Dependency(moved, j);
+            if (clusterSet.contains(dep)) {
+                CLUSTER_DEBUGF("Cluster: Bail, %s depends on %s.\n",
+                               GrTCluster_DebugStr<T, Traits>(moved).c_str(),
+                               GrTCluster_DebugStr<T, Traits>(dep).c_str());
+                return false;
+            }
+        }
+    }
+
+    // Grab the moved tasks and pull them before clusterHead.
+    for (T* moved = movedHead; moved;) {
+        CLUSTER_DEBUGF("Cluster: Reorder %s behind %s.\n",
+                       GrTCluster_DebugStr<T, Traits>(moved).c_str(),
+                       GrTCluster_DebugStr<T, Traits>(clusterHead).c_str());
+        // Be careful to save fNext before each move.
+        T* nextMoved = moved->fNext;
+        llist->remove(moved);
+        llist->addBefore(moved, clusterHead);
+        moved = nextMoved;
+    }
+    return true;
+}
+
+// Take a topologically-sorted DAG and cluster the entries together while maintaining the
+// dependency order.
+//
+// If no clustering is possible, returns false.
+// Otherwise, returns true and populates the provided llist as such:
+//   - Contains the same set of entries as `input`.
+//   - Obeys the dependency rules in `input`.
+//   - Places entries with the same Target (see Traits) adjacent to each other.
+//   - Entries with multiple targets act as reordering barriers for all their targets.
+//
+// T must declare a public SkTInternalLList interface.
+//
+// Traits requires:
+//
+//   static int NumTargets(const T* t) { ... }      //
+//   static uint32_t GetTarget(const T* t, int i)   //
+//   static int NumDependencies(const T* t) { ... } //
+//   static T* Dependency(T* t, int index) { ... }  //
+//   static uint32_t GetID(T* t) { ... }            //
+template <typename T, typename Traits = T>
+bool GrTCluster(SkSpan<const sk_sp<T>> input, SkTInternalLList<T>* llist) {
+    SkASSERT(llist->isEmpty());
+
+    if (input.count() < 3) {
+        return false;
+    }
+
+    CLUSTER_DEBUGF("Cluster: Original order is %s\n",
+                   GrTCluster_DebugStr<T, Traits>(input).c_str());
+
+    SkTHashMap<uint32_t, T*> lastTaskMap;
+    bool didReorder = false;
+    for (const auto& t : input) {
+        didReorder |= GrTCluster_Visit<T, Traits>(t.get(), llist, &lastTaskMap);
+    }
+
+#ifdef SK_DEBUG
+    if (didReorder) {
+        GrTCluster_Validate<T, Traits>(input, *llist);
+    }
+#endif
+
+    return didReorder;
+}
+
+#undef CLUSTER_DEBUGF
+
+#endif
commit	78036086cf9b2520758b0cb20595ea021fdbc522	[log] [tgz]
author	Adlai Holler <adlai@google.com>	Thu Jan 07 11:41:33 2021 -0500
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	Thu Jan 07 17:20:03 2021 +0000
tree	d81d6746bde42e56a028b000d4faa4f8ec148259
parent	5779a86072b468b52cbcc54566fcf5f561f92d47 [diff]