drm/amdgpu: add ctx_id to the WAIT_CS IOCTL (v4)

It is required to support fence per context.

v2: add amdgpu_ctx_get/put
v3: improve get/put
v4: squash hlock fix

Signed-off-by: Jammy Zhou <Jammy.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ffbe9aa..86b9324 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -768,8 +768,13 @@
 	uint64_t seq[AMDGPU_MAX_RINGS] = {0};
 	struct amdgpu_ring *ring = NULL;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+	struct amdgpu_ctx *ctx;
 	long r;
 
+	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+	if (ctx == NULL)
+		return -EINVAL;
+
 	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
 			       wait->in.ring, &ring);
 	if (r)
@@ -778,6 +783,7 @@
 	seq[ring->idx] = wait->in.handle;
 
 	r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout);
+	amdgpu_ctx_put(ctx);
 	if (r < 0)
 		return r;