drm/amd: add scheduler fence implementation (v2)
scheduler fence is based on kernel fence framework.
v2: squash in Christian's build fix
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian K?nig <christian.koenig@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f1cb7d2..04c2707 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -86,6 +86,7 @@
# GPU scheduler
amdgpu-y += \
../scheduler/gpu_scheduler.o \
+ ../scheduler/sched_fence.o \
amdgpu_sched.o
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 987e307..2ba448e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1261,6 +1261,7 @@
int (*prepare_job)(struct amdgpu_cs_parser *sched_job);
int (*run_job)(struct amdgpu_cs_parser *sched_job);
int (*free_job)(struct amdgpu_cs_parser *sched_job);
+ struct amd_sched_fence *s_fence;
};
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b1dc7e1..f428288 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -899,8 +899,6 @@
if (amdgpu_enable_scheduler && parser->num_ibs) {
struct amdgpu_ring * ring =
amdgpu_cs_parser_get_ring(adev, parser);
- parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return(
- &parser->ctx->rings[ring->idx].entity.last_queued_v_seq);
if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) {
r = amdgpu_cs_parser_prepare_job(parser);
if (r)
@@ -910,10 +908,21 @@
parser->ring = ring;
parser->run_job = amdgpu_cs_parser_run_job;
parser->free_job = amdgpu_cs_parser_free_job;
- amd_sched_push_job(ring->scheduler,
- &parser->ctx->rings[ring->idx].entity,
- parser);
- cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
+ mutex_lock(&parser->job_lock);
+ r = amd_sched_push_job(ring->scheduler,
+ &parser->ctx->rings[ring->idx].entity,
+ parser,
+ &parser->s_fence);
+ if (r) {
+ mutex_unlock(&parser->job_lock);
+ goto out;
+ }
+ parser->ibs[parser->num_ibs - 1].sequence =
+ amdgpu_ctx_add_fence(parser->ctx, ring,
+ &parser->s_fence->base,
+ parser->s_fence->v_seq);
+ cs->out.handle = parser->s_fence->v_seq;
+ mutex_unlock(&parser->job_lock);
up_read(&adev->exclusive_lock);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 232e800..1833f05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -268,16 +268,6 @@
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct fence *fence;
uint64_t queued_seq;
- int r;
-
- if (amdgpu_enable_scheduler) {
- r = amd_sched_wait_emit(&cring->entity,
- seq,
- false,
- -1);
- if (r)
- return NULL;
- }
spin_lock(&ctx->ring_lock);
if (amdgpu_enable_scheduler)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index eed409c..5104e64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -218,7 +218,7 @@
sequence = amdgpu_enable_scheduler ? ib->sequence : 0;
- if (ib->ctx)
+ if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
&ib->fence->base,
sequence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index d82f248..6a7e83e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -118,7 +118,6 @@
{
int r = 0;
if (amdgpu_enable_scheduler) {
- uint64_t v_seq;
struct amdgpu_cs_parser *sched_job =
amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx,
ibs, num_ibs);
@@ -126,22 +125,23 @@
return -ENOMEM;
}
sched_job->free_job = free_job;
- v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq);
- ibs[num_ibs - 1].sequence = v_seq;
- amd_sched_push_job(ring->scheduler,
- &adev->kernel_ctx.rings[ring->idx].entity,
- sched_job);
- r = amd_sched_wait_emit(
- &adev->kernel_ctx.rings[ring->idx].entity,
- v_seq,
- false,
- -1);
- if (r)
- WARN(true, "emit timeout\n");
- } else
+ mutex_lock(&sched_job->job_lock);
+ r = amd_sched_push_job(ring->scheduler,
+ &adev->kernel_ctx.rings[ring->idx].entity,
+ sched_job, &sched_job->s_fence);
+ if (r) {
+ mutex_unlock(&sched_job->job_lock);
+ kfree(sched_job);
+ return r;
+ }
+ ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq;
+ *f = &sched_job->s_fence->base;
+ mutex_unlock(&sched_job->job_lock);
+ } else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
- if (r)
- return r;
- *f = &ibs[num_ibs - 1].fence->base;
+ if (r)
+ return r;
+ *f = &ibs[num_ibs - 1].fence->base;
+ }
return 0;
}