drm/amdgpu: optimize scheduler fence handling

We only need to wait for jobs to be scheduled when
the dependency is from the same scheduler.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index ea30d6a..b7cd108 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -211,6 +211,41 @@
 	amd_sched_wakeup(entity->sched);
 }
 
+static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
+{
+	struct amd_gpu_scheduler *sched = entity->sched;
+	struct fence * fence = entity->dependency;
+	struct amd_sched_fence *s_fence;
+
+	if (fence->context == entity->fence_context) {
+		/* We can ignore fences from ourself */
+		fence_put(entity->dependency);
+		return false;
+	}
+
+	s_fence = to_amd_sched_fence(fence);
+	if (s_fence && s_fence->sched == sched) {
+		/* Fence is from the same scheduler */
+		if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) {
+			/* Ignore it when it is already scheduled */
+			fence_put(entity->dependency);
+			return false;
+		}
+
+		/* Wait for fence to be scheduled */
+		entity->cb.func = amd_sched_entity_wakeup;
+		list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
+		return true;
+	}
+
+	if (!fence_add_callback(entity->dependency, &entity->cb,
+				amd_sched_entity_wakeup))
+		return true;
+
+	fence_put(entity->dependency);
+	return false;
+}
+
 static struct amd_sched_job *
 amd_sched_entity_pop_job(struct amd_sched_entity *entity)
 {
@@ -223,20 +258,9 @@
 	if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
 		return NULL;
 
-	while ((entity->dependency = sched->ops->dependency(sched_job))) {
-
-		if (entity->dependency->context == entity->fence_context) {
-			/* We can ignore fences from ourself */
-			fence_put(entity->dependency);
-			continue;
-		}
-
-		if (fence_add_callback(entity->dependency, &entity->cb,
-				       amd_sched_entity_wakeup))
-			fence_put(entity->dependency);
-		else
+	while ((entity->dependency = sched->ops->dependency(sched_job)))
+		if (amd_sched_entity_add_dependency_cb(entity))
 			return NULL;
-	}
 
 	return sched_job;
 }
@@ -400,6 +424,7 @@
 
 		atomic_inc(&sched->hw_rq_count);
 		fence = sched->ops->run_job(sched_job);
+		amd_sched_fence_scheduled(s_fence);
 		if (fence) {
 			r = fence_add_callback(fence, &s_fence->cb,
 					       amd_sched_process_job);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 939692b..a0f0ae5 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,6 +27,8 @@
 #include <linux/kfifo.h>
 #include <linux/fence.h>
 
+#define AMD_SCHED_FENCE_SCHEDULED_BIT	FENCE_FLAG_USER_BITS
+
 struct amd_gpu_scheduler;
 struct amd_sched_rq;
 
@@ -68,6 +70,7 @@
 struct amd_sched_fence {
 	struct fence                    base;
 	struct fence_cb                 cb;
+	struct list_head		scheduled_cb;
 	struct amd_gpu_scheduler	*sched;
 	spinlock_t			lock;
 	void                            *owner;
@@ -134,7 +137,7 @@
 
 struct amd_sched_fence *amd_sched_fence_create(
 	struct amd_sched_entity *s_entity, void *owner);
+void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
 void amd_sched_fence_signal(struct amd_sched_fence *fence);
 
-
 #endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 8d2130b..87c78ee 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -35,6 +35,8 @@
 	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
 	if (fence == NULL)
 		return NULL;
+
+	INIT_LIST_HEAD(&fence->scheduled_cb);
 	fence->owner = owner;
 	fence->sched = s_entity->sched;
 	spin_lock_init(&fence->lock);
@@ -55,6 +57,17 @@
 		FENCE_TRACE(&fence->base, "was already signaled\n");
 }
 
+void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence)
+{
+	struct fence_cb *cur, *tmp;
+
+	set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags);
+	list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) {
+		list_del_init(&cur->node);
+		cur->func(&s_fence->base, cur);
+	}
+}
+
 static const char *amd_sched_fence_get_driver_name(struct fence *fence)
 {
 	return "amd_sched";