aco: add ACO_DEBUG=force-waitcnt to emit wait-states
Sounds useful for debugging missing wait-states and for improving
detection of the faulty instruction in case of memory violations.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6386>
diff --git a/docs/envvars.rst b/docs/envvars.rst
index 4beba25..cee45fb 100644
--- a/docs/envvars.rst
+++ b/docs/envvars.rst
@@ -612,6 +612,8 @@
validate register assignment of ACO IR and catches many RA bugs
``perfwarn``
abort on some suboptimal code generation
+ ``force-waitcnt``
+ force emitting waitcnt states if there is something to wait for
radeonsi driver environment variables
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index db0c7c0..751892e 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -473,9 +473,32 @@
return imm;
}
+void force_waitcnt(wait_ctx& ctx, wait_imm& imm)
+{
+ if (ctx.vm_cnt)
+ imm.vm = 0;
+ if (ctx.exp_cnt)
+ imm.exp = 0;
+ if (ctx.lgkm_cnt)
+ imm.lgkm = 0;
+
+ if (ctx.chip_class >= GFX10) {
+ if (ctx.vs_cnt)
+ imm.vs = 0;
+ }
+}
+
wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
{
wait_imm imm;
+
+ if (debug_flags & DEBUG_FORCE_WAITCNT) {
+ /* Force emitting waitcnt states right after the instruction if there is
+ * something to wait for.
+ */
+ force_waitcnt(ctx, imm);
+ }
+
if (ctx.exp_cnt || ctx.vm_cnt || ctx.lgkm_cnt)
imm.combine(check_instr(instr, ctx));
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 2a0a500..801fbc6 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -34,6 +34,7 @@
{"validateir", DEBUG_VALIDATE_IR},
{"validatera", DEBUG_VALIDATE_RA},
{"perfwarn", DEBUG_PERFWARN},
+ {"force-waitcnt", DEBUG_FORCE_WAITCNT},
{NULL, 0}
};
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 0b3a776..0e5b69a 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -51,6 +51,7 @@
DEBUG_VALIDATE_IR = 0x1,
DEBUG_VALIDATE_RA = 0x2,
DEBUG_PERFWARN = 0x4,
+ DEBUG_FORCE_WAITCNT = 0x8,
};
/**