drm/amdgpu: process sched job exactly triggered by fence signal

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian K?nig <christian.koenig@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 53d70f7..423cf91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -404,7 +404,7 @@
 
 struct amdgpu_fence {
 	struct fence base;
-	struct fence_cb cb;
+
 	/* RB, DMA, etc. */
 	struct amdgpu_ring		*ring;
 	uint64_t			seq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 0fcf020..71a4a7e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -45,19 +45,24 @@
 
 static void amdgpu_fence_sched_cb(struct fence *f, struct fence_cb *cb)
 {
-	struct amdgpu_fence *fence =
-		container_of(cb, struct amdgpu_fence, cb);
-	amd_sched_isr(fence->ring->scheduler);
+	struct amd_sched_job *sched_job =
+		container_of(cb, struct amd_sched_job, cb);
+	amd_sched_process_job(sched_job);
 }
 
 static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
 				 struct amd_context_entity *c_entity,
-				 void *job)
+				 struct amd_sched_job *job)
 {
 	int r = 0;
-	struct amdgpu_cs_parser *sched_job = (struct amdgpu_cs_parser *)job;
+	struct amdgpu_cs_parser *sched_job;
 	struct amdgpu_fence *fence;
 
+	if (!job || !job->job) {
+		DRM_ERROR("job is null\n");
+		return;
+	}
+	sched_job = (struct amdgpu_cs_parser *)job->job;
 	mutex_lock(&sched_job->job_lock);
 	r = amdgpu_ib_schedule(sched_job->adev,
 			       sched_job->num_ibs,
@@ -67,8 +72,10 @@
 		goto err;
 	fence = sched_job->ibs[sched_job->num_ibs - 1].fence;
 	if (fence_add_callback(&fence->base,
-			       &fence->cb, amdgpu_fence_sched_cb))
+			       &job->cb, amdgpu_fence_sched_cb)) {
+		DRM_ERROR("fence add callback failed\n");
 		goto err;
+	}
 
 	if (sched_job->run_job) {
 		r = sched_job->run_job(sched_job);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1204b73..4c2c5ad 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -107,7 +107,14 @@
 */
 static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
 {
-	return !kfifo_is_full(&sched->active_hw_rq);
+	unsigned long flags;
+	bool full;
+	spin_lock_irqsave(&sched->queue_lock, flags);
+	full = atomic64_read(&sched->hw_rq_count) <
+		sched->hw_submission_limit ? true : false;
+	spin_unlock_irqrestore(&sched->queue_lock, flags);
+
+	return full;
 }
 
 /**
@@ -369,6 +376,7 @@
 	sched_setscheduler(current, SCHED_FIFO, &sparam);
 
 	while (!kthread_should_stop()) {
+		struct amd_sched_job *sched_job = NULL;
 		wait_event_interruptible(sched->wait_queue,
 					 is_scheduler_ready(sched) &&
 					 (c_entity = select_context(sched)));
@@ -376,43 +384,48 @@
 		if (r != sizeof(void *))
 			continue;
 		r = sched->ops->prepare_job(sched, c_entity, job);
-		if (!r)
-			WARN_ON(kfifo_in_spinlocked(
-					&sched->active_hw_rq,
-					&job,
-					sizeof(void *),
-					&sched->queue_lock) != sizeof(void *));
+		if (!r) {
+			unsigned long flags;
+			sched_job = kzalloc(sizeof(struct amd_sched_job),
+					    GFP_KERNEL);
+			if (!sched_job) {
+				WARN(true, "No memory to allocate\n");
+				continue;
+			}
+			sched_job->job = job;
+			sched_job->sched = sched;
+			spin_lock_irqsave(&sched->queue_lock, flags);
+			list_add_tail(&sched_job->list, &sched->active_hw_rq);
+			atomic64_inc(&sched->hw_rq_count);
+			spin_unlock_irqrestore(&sched->queue_lock, flags);
+		}
 		mutex_lock(&sched->sched_lock);
-		sched->ops->run_job(sched, c_entity, job);
+		sched->ops->run_job(sched, c_entity, sched_job);
 		mutex_unlock(&sched->sched_lock);
 	}
 	return 0;
 }
 
-uint64_t amd_sched_get_handled_seq(struct amd_gpu_scheduler *sched)
-{
-	return atomic64_read(&sched->last_handled_seq);
-}
-
 /**
  * ISR to handle EOP inetrrupts
  *
  * @sched: gpu scheduler
  *
 */
-void amd_sched_isr(struct amd_gpu_scheduler *sched)
+void amd_sched_process_job(struct amd_sched_job *sched_job)
 {
-	int r;
-	void *job;
-	r = kfifo_out_spinlocked(&sched->active_hw_rq,
-				 &job, sizeof(void *),
-				 &sched->queue_lock);
+	unsigned long flags;
+	struct amd_gpu_scheduler *sched;
+	if (!sched_job)
+		return;
+	sched = sched_job->sched;
+	spin_lock_irqsave(&sched->queue_lock, flags);
+	list_del(&sched_job->list);
+	atomic64_dec(&sched->hw_rq_count);
+	spin_unlock_irqrestore(&sched->queue_lock, flags);
 
-	if (r != sizeof(void *))
-		job = NULL;
-
-	sched->ops->process_job(sched, job);
-	atomic64_inc(&sched->last_handled_seq);
+	sched->ops->process_job(sched, sched_job->job);
+	kfree(sched_job);
 	wake_up_interruptible(&sched->wait_queue);
 }
 
@@ -446,8 +459,7 @@
 	sched->granularity = granularity;
 	sched->ring_id = ring;
 	sched->preemption = preemption;
-	atomic64_set(&sched->last_handled_seq, 0);
-
+	sched->hw_submission_limit = hw_submission;
 	snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
 	mutex_init(&sched->sched_lock);
 	spin_lock_init(&sched->queue_lock);
@@ -458,13 +470,8 @@
 	sched->kernel_rq.check_entity_status = gpu_entity_check_status;
 
 	init_waitqueue_head(&sched->wait_queue);
-	if(kfifo_alloc(&sched->active_hw_rq,
-		       hw_submission * sizeof(void *),
-		       GFP_KERNEL)) {
-		kfree(sched);
-		return NULL;
-	}
-
+	INIT_LIST_HEAD(&sched->active_hw_rq);
+	atomic64_set(&sched->hw_rq_count, 0);
 	/* Each scheduler will run on a seperate kernel thread */
 	sched->thread = kthread_create(amd_sched_main, sched, name);
 	if (sched->thread) {
@@ -473,7 +480,6 @@
 	}
 
 	DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
-	kfifo_free(&sched->active_hw_rq);
 	kfree(sched);
 	return NULL;
 }
@@ -488,7 +494,6 @@
 int amd_sched_destroy(struct amd_gpu_scheduler *sched)
 {
 	kthread_stop(sched->thread);
-	kfifo_free(&sched->active_hw_rq);
 	kfree(sched);
 	return  0;
 }
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 1a01ac4..8a756a5 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -25,6 +25,7 @@
 #define _GPU_SCHEDULER_H_
 
 #include <linux/kfifo.h>
+#include <linux/fence.h>
 
 #define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS		3000
 
@@ -80,6 +81,13 @@
 	bool                            is_pending;
 };
 
+struct amd_sched_job {
+	struct list_head		list;
+	struct fence_cb                 cb;
+	struct amd_gpu_scheduler        *sched;
+	void                            *job;
+};
+
 /**
  * Define the backend operations called by the scheduler,
  * these functions should be implemented in driver side
@@ -90,7 +98,7 @@
 			   void *job);
 	void (*run_job)(struct amd_gpu_scheduler *sched,
 			struct amd_context_entity *c_entity,
-			void *job);
+			struct amd_sched_job *job);
 	void (*process_job)(struct amd_gpu_scheduler *sched, void *job);
 };
 
@@ -102,19 +110,19 @@
 	struct task_struct		*thread;
 	struct amd_run_queue		sched_rq;
 	struct amd_run_queue		kernel_rq;
-	struct kfifo                    active_hw_rq;
+	struct list_head		active_hw_rq;
+	atomic64_t			hw_rq_count;
 	struct amd_sched_backend_ops	*ops;
 	uint32_t			ring_id;
 	uint32_t			granularity; /* in ms unit */
 	uint32_t			preemption;
-	atomic64_t			last_handled_seq;
 	wait_queue_head_t		wait_queue;
 	struct amd_context_entity	*current_entity;
 	struct mutex			sched_lock;
 	spinlock_t			queue_lock;
+	uint32_t                        hw_submission_limit;
 };
 
-
 struct amd_gpu_scheduler *amd_sched_create(void *device,
 				struct amd_sched_backend_ops *ops,
 				uint32_t ring,
@@ -133,7 +141,7 @@
 			bool intr,
 			long timeout);
 
-void amd_sched_isr(struct amd_gpu_scheduler *sched);
+void amd_sched_process_job(struct amd_sched_job *sched_job);
 uint64_t amd_sched_get_handled_seq(struct amd_gpu_scheduler *sched);
 
 int amd_context_entity_fini(struct amd_gpu_scheduler *sched,