AMDGPU : Add trap handler support.
Differential Revision: http://reviews.llvm.org/D26010
llvm-svn: 294692
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index aed8ce1..fbe7606 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -67,6 +67,12 @@
"Support unaligned global loads and stores"
>;
+def FeatureTrapHandler: SubtargetFeature<"trap-handler",
+ "TrapHandler",
+ "true",
+ "Trap handler support"
+>;
+
def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"UnalignedScratchAccess",
"true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 85c7673..4f2ed9f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -191,7 +191,8 @@
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
{ "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
{ "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
- { "llvm.trap", "amdgpu-queue-ptr" }
+ { "llvm.trap", "amdgpu-queue-ptr" },
+ { "llvm.debugtrap", "amdgpu-queue-ptr" }
};
// TODO: We should not add the attributes if the known compile time workgroup
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 06d078c..e4ac295 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -243,6 +243,9 @@
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
+ Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),
+ false);
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
false);
@@ -634,6 +637,7 @@
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
+ S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |
S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index a924f69..c377a0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -44,7 +44,7 @@
SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-for-global,+unaligned-buffer-access,";
+ FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
FullFS += FS;
@@ -94,6 +94,7 @@
UnalignedBufferAccess(false),
EnableXNACK(false),
+ TrapHandler(false),
DebuggerInsertNops(false),
DebuggerReserveRegs(false),
DebuggerEmitPrologue(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 45c53ac..068bf0c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -66,6 +66,22 @@
ISAVersion8_1_0,
};
+ enum TrapHandlerAbi {
+ TrapHandlerAbiNone = 0,
+ TrapHandlerAbiHsa = 1
+ };
+
+ enum TrapCode {
+ TrapCodeBreakPoint = 0,
+ TrapCodeLLVMTrap = 1,
+ TrapCodeLLVMDebugTrap = 2,
+ TrapCodeHSADebugTrap = 3
+ };
+
+ enum TrapRegValues {
+ TrapCodeLLVMTrapRegValue = 1
+ };
+
protected:
// Basic subtarget description.
Triple TargetTriple;
@@ -88,6 +104,7 @@
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool EnableXNACK;
+ bool TrapHandler;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
bool DebuggerEmitPrologue;
@@ -256,6 +273,10 @@
return CaymanISA;
}
+ TrapHandlerAbi getTrapHandlerAbi() const {
+ return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -309,6 +330,10 @@
return UnalignedScratchAccess;
}
+ bool isTrapHandlerEnabled() const {
+ return TrapHandler;
+ }
+
bool isXNACKEnabled() const {
return EnableXNACK;
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index ff4e321..759a043 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -300,6 +300,9 @@
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
+#define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6)
+#define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1)
+#define C_00B84C_TRAP_HANDLER 0xFFFFFFBF
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
@@ -387,7 +390,6 @@
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
-
} // End namespace llvm
#endif
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7e49fc2..ba3a623 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -276,6 +276,7 @@
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -1779,24 +1780,46 @@
}
switch (MI.getOpcode()) {
- case AMDGPU::S_TRAP_PSEUDO: {
- DebugLoc DL = MI.getDebugLoc();
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
- .addImm(1);
+ case AMDGPU::S_TRAP_PSEUDO: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ const int TrapType = MI.getOperand(0).getImm();
- MachineFunction *MF = BB->getParent();
- SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
- unsigned UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
+ if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
+ Subtarget->isTrapHandlerEnabled()) {
- if (!BB->isLiveIn(UserSGPR))
- BB->addLiveIn(UserSGPR);
+ MachineFunction *MF = BB->getParent();
+ SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+ unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+ assert(UserSGPR != AMDGPU::NoRegister);
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
- .addReg(UserSGPR);
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1)
- .addReg(AMDGPU::VGPR0, RegState::Implicit)
- .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
+ if (!BB->isLiveIn(UserSGPR))
+ BB->addLiveIn(UserSGPR);
+
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
+ .addReg(UserSGPR);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
+ .addImm(TrapType)
+ .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
+ } else {
+ switch (TrapType) {
+ case SISubtarget::TrapCodeLLVMTrap:
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
+ break;
+ case SISubtarget::TrapCodeLLVMDebugTrap: {
+ DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
+ "debugtrap handler not supported",
+ DL,
+ DS_Warning);
+ LLVMContext &C = MF->getFunction()->getContext();
+ C.diagnose(NoTrap);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP))
+ .addImm(0);
+ break;
+ }
+ default:
+ llvm_unreachable("unsupported trap handler type!");
+ }
+ }
MI.eraseFromParent();
return BB;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a691ef1..9458054 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -631,6 +631,11 @@
int NONE = 0;
}
+def TRAPTYPE {
+ int LLVM_TRAP = 1;
+ int LLVM_DEBUG_TRAP = 2;
+}
+
//===----------------------------------------------------------------------===//
//
// SI Instruction multiclass helpers.
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ed0609d..be82c9f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -111,8 +111,7 @@
(ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
-def S_TRAP_PSEUDO : VPseudoInstSI <(outs), (ins),
- [(trap)]> {
+def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
let hasSideEffects = 1;
let SALU = 1;
let usesCustomInserter = 1;
@@ -390,6 +389,15 @@
} // End SubtargetPredicate = isGCN
let Predicates = [isGCN] in {
+def : Pat<
+ (trap),
+ (S_TRAP_PSEUDO TRAPTYPE.LLVM_TRAP)
+>;
+
+def : Pat<
+ (debugtrap),
+ (S_TRAP_PSEUDO TRAPTYPE.LLVM_DEBUG_TRAP)
+>;
def : Pat<
(int_amdgcn_else i64:$src, bb:$target),
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index c55eaab..991408c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -87,7 +87,7 @@
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR),
-// TODO: enable_trap_handler
+COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER),
COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN),
COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN),
COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN),