[AMDGPU] Ensure there are enough registers for wave dispatch
Summary:
This fixes the number of SGPRs and VGPRs in the *_RSRC1 register to
allow for registers set up in wave dispatch, even if those registers are
not used in the shader.
Re-landed after noticing that the buildbot failure from 329808 seemed to
be unrelated.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D45503
Change-Id: I6575f0e0d2a528d1319d0b289f0ebe4510fa5771
llvm-svn: 329826
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d64e655..9cb9045 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -837,6 +837,19 @@
ProgInfo.NumSGPR += ExtraSGPRs;
ProgInfo.NumVGPR += ExtraVGPRs;
+ // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
+ // dispatch registers are function args.
+ unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
+ for (auto &Arg : MF.getFunction().args()) {
+ unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
+ if (Arg.hasAttribute(Attribute::InReg))
+ WaveDispatchNumSGPR += NumRegs;
+ else
+ WaveDispatchNumVGPR += NumRegs;
+ }
+ ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
+ ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
+
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
diff --git a/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
new file mode 100644
index 0000000..06174f8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
+
+; This compute shader has input args that claim that it has 17 sgprs and 5 vgprs
+; in wave dispatch. Ensure that the sgpr and vgpr counts in COMPUTE_PGM_RSRC1
+; are set to reflect that, even though the registers are not used in the shader.
+
+; GCN-LABEL: {{^}}_amdgpu_cs_main:
+; SI: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}81,
+; VI: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}c1,
+; GFX9: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}81,
+
+define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg, i32 inreg, <2 x i32> inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <5 x i32>) {
+.entry:
+ ret void
+}
+