drm/amd: fix deadlock of job_list_lock V2 run_job involves mutex, which could sleep. V2: use list_for_each_entry_safe, since the job might complete while we dropped the lock. Signed-off-by: Chunming Zhou <David1.Zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

commit: 1c62cf91e6ac32346b2dd3bb9d739219a9cc39d0 [log] [tgz]
author: Chunming Zhou <David1.Zhou@amd.com> Mon Jul 25 13:55:35 2016 +0800
committer: Alex Deucher <alexander.deucher@amd.com> Fri Jul 29 14:37:06 2016 -0400
tree: a12aea43826052b3560aad21c2fa73e17e0a3e2d
parent: bdc2eea4722b96f54edb7588078540505b5ac096 [diff] [blame]
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 21c49d3..ef312bb 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

@@ -399,7 +399,7 @@
 
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
 {
-	struct amd_sched_job *s_job;
+	struct amd_sched_job *s_job, *tmp;
 	int r;
 
 	spin_lock(&sched->job_list_lock);
@@ -408,10 +408,12 @@
 	if (s_job)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 
-	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct amd_sched_fence *s_fence = s_job->s_fence;
-		struct fence *fence = sched->ops->run_job(s_job);
+		struct fence *fence;
 
+		spin_unlock(&sched->job_list_lock);
+		fence = sched->ops->run_job(s_job);
 		atomic_inc(&sched->hw_rq_count);
 		if (fence) {
 			s_fence->parent = fence_get(fence);
@@ -427,6 +429,7 @@
 			DRM_ERROR("Failed to run job!\n");
 			amd_sched_process_job(NULL, &s_fence->cb);
 		}
+		spin_lock(&sched->job_list_lock);
 	}
 	spin_unlock(&sched->job_list_lock);
 }
commit	1c62cf91e6ac32346b2dd3bb9d739219a9cc39d0	[log] [tgz]
author	Chunming Zhou <David1.Zhou@amd.com>	Mon Jul 25 13:55:35 2016 +0800
committer	Alex Deucher <alexander.deucher@amd.com>	Fri Jul 29 14:37:06 2016 -0400
tree	a12aea43826052b3560aad21c2fa73e17e0a3e2d
parent	bdc2eea4722b96f54edb7588078540505b5ac096 [diff] [blame]