[AMDGPU] Change llvm.debugtrap to be a debug breakpoint that can resume execution.

No longer require the queue pointer to be passed in in fixed SGPRs.

Differential Revision: https://reviews.llvm.org/D46769

llvm-svn: 332485
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 7d358df..eb0af5e 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -3788,14 +3788,33 @@
                                            ``queue_ptr`` terminated and its
                                                          associated queue put
                                                          into the error state.
-     ``llvm.debugtrap``  ``s_trap 0x03`` ``SGPR0-1``:    If debugger not
-                                           ``queue_ptr`` installed handled
-                                                         same as ``llvm.trap``.
-     debugger breakpoint ``s_trap 0x07``                 Reserved for  debugger
+     ``llvm.debugtrap``  ``s_trap 0x03``                 - If debugger not
+                                                           installed then
+                                                           behaves as a
+                                                           no-operation. The
+                                                           trap handler is
+                                                           entered and
+                                                           immediately returns
+                                                           to continue
+                                                           execution of the
+                                                           wavefront.
+                                                         - If the debugger is
+                                                           installed, causes
+                                                           the debug trap to be
+                                                           reported by the
+                                                           debugger and the
+                                                           wavefront is put in
+                                                           the halt state until
+                                                           resumed by the
+                                                           debugger.
+     reserved            ``s_trap 0x04``                 Reserved.
+     reserved            ``s_trap 0x05``                 Reserved.
+     reserved            ``s_trap 0x06``                 Reserved.
+     debugger breakpoint ``s_trap 0x07``                 Reserved for debugger
                                                          breakpoints.
-     debugger            ``s_trap 0x08``                 Reserved for debugger.
-     debugger            ``s_trap 0xfe``                 Reserved for debugger.
-     debugger            ``s_trap 0xff``                 Reserved for debugger.
+     reserved            ``s_trap 0x08``                 Reserved.
+     reserved            ``s_trap 0xfe``                 Reserved.
+     reserved            ``s_trap 0xff``                 Reserved.
      =================== =============== =============== =======================
 
 AMDPAL
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f21dd62..9181487 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3349,8 +3349,9 @@
   case ISD::FP_ROUND:
     return lowerFP_ROUND(Op, DAG);
   case ISD::TRAP:
-  case ISD::DEBUGTRAP:
     return lowerTRAP(Op, DAG);
+  case ISD::DEBUGTRAP:
+    return lowerDEBUGTRAP(Op, DAG);
   }
   return SDValue();
 }
@@ -4011,40 +4012,37 @@
 
 SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
-  MachineFunction &MF = DAG.getMachineFunction();
   SDValue Chain = Op.getOperand(0);
 
-  unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ?
-    SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap;
-
-  if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
-      Subtarget->isTrapHandlerEnabled()) {
-    SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-    unsigned UserSGPR = Info->getQueuePtrUserSGPR();
-    assert(UserSGPR != AMDGPU::NoRegister);
-
-    SDValue QueuePtr = CreateLiveInRegister(
-      DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
-
-    SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
-
-    SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
-                                     QueuePtr, SDValue());
-
-    SDValue Ops[] = {
-      ToReg,
-      DAG.getTargetConstant(TrapID, SL, MVT::i16),
-      SGPR01,
-      ToReg.getValue(1)
-    };
-
-    return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
-  }
-
-  switch (TrapID) {
-  case SISubtarget::TrapIDLLVMTrap:
+  if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa ||
+      !Subtarget->isTrapHandlerEnabled())
     return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
-  case SISubtarget::TrapIDLLVMDebugTrap: {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+  assert(UserSGPR != AMDGPU::NoRegister);
+  SDValue QueuePtr = CreateLiveInRegister(
+    DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+  SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
+  SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
+                                   QueuePtr, SDValue());
+  SDValue Ops[] = {
+    ToReg,
+    DAG.getTargetConstant(SISubtarget::TrapIDLLVMTrap, SL, MVT::i16),
+    SGPR01,
+    ToReg.getValue(1)
+  };
+  return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
+}
+
+SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Chain = Op.getOperand(0);
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa ||
+      !Subtarget->isTrapHandlerEnabled()) {
     DiagnosticInfoUnsupported NoTrap(MF.getFunction(),
                                      "debugtrap handler not supported",
                                      Op.getDebugLoc(),
@@ -4053,11 +4051,12 @@
     Ctx.diagnose(NoTrap);
     return Chain;
   }
-  default:
-    llvm_unreachable("unsupported trap handler type!");
-  }
 
-  return Chain;
+  SDValue Ops[] = {
+    Chain,
+    DAG.getTargetConstant(SISubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16)
+  };
+  return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
 }
 
 SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 3a99994..ae8b19a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -86,6 +86,7 @@
   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
 
   SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
 
diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index 04ff4c8..53398d5 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s
+; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
 
 ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
@@ -15,15 +15,15 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
 
 declare void @llvm.trap() #0
-declare void @llvm.debugtrap() #0
+declare void @llvm.debugtrap() #1
 
 ; MESA-TRAP: .section .AMDGPU.config
 ; MESA-TRAP:  .long   47180
-; MESA-TRAP-NEXT: .long   204
+; MESA-TRAP-NEXT: .long   208
 
 ; NOMESA-TRAP: .section .AMDGPU.config
 ; NOMESA-TRAP:  .long   47180
-; NOMESA-TRAP-NEXT: .long   140
+; NOMESA-TRAP-NEXT: .long   144
 
 ; GCN-LABEL: {{^}}hsa_trap:
 ; HSA-TRAP: enable_trap_handler = 1
@@ -38,24 +38,27 @@
 ; TRAP-BIT: enable_trap_handler = 1
 ; NO-TRAP-BIT: enable_trap_handler = 0
 ; NO-MESA-TRAP: s_endpgm
-define amdgpu_kernel void @hsa_trap() {
+define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0) {
+  store volatile i32 1, i32 addrspace(1)* %arg0
   call void @llvm.trap()
+  unreachable
+  store volatile i32 2, i32 addrspace(1)* %arg0
   ret void
 }
 
 ; MESA-TRAP: .section .AMDGPU.config
 ; MESA-TRAP:  .long   47180
-; MESA-TRAP-NEXT: .long   204
+; MESA-TRAP-NEXT: .long   208
 
 ; NOMESA-TRAP: .section .AMDGPU.config
 ; NOMESA-TRAP:  .long   47180
-; NOMESA-TRAP-NEXT: .long   140
+; NOMESA-TRAP-NEXT: .long   144
 
-; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (): debugtrap handler not supported
+; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported
 ; GCN-LABEL: {{^}}hsa_debugtrap:
 ; HSA-TRAP: enable_trap_handler = 1
-; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
 ; HSA-TRAP: s_trap 3
+; HSA-TRAP: flat_store_dword v[0:1], v3
 
 ; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
 ; NO-HSA-TRAP: enable_trap_handler = 0
@@ -64,8 +67,10 @@
 ; TRAP-BIT: enable_trap_handler = 1
 ; NO-TRAP-BIT: enable_trap_handler = 0
 ; NO-MESA-TRAP: s_endpgm
-define amdgpu_kernel void @hsa_debugtrap() {
+define amdgpu_kernel void @hsa_debugtrap(i32 addrspace(1)* nocapture readonly %arg0) {
+  store volatile i32 1, i32 addrspace(1)* %arg0
   call void @llvm.debugtrap()
+  store volatile i32 2, i32 addrspace(1)* %arg0
   ret void
 }
 
@@ -75,8 +80,11 @@
 ; NO-TRAP-BIT: enable_trap_handler = 0
 ; NO-HSA-TRAP: s_endpgm
 ; NO-MESA-TRAP: s_endpgm
-define amdgpu_kernel void @trap() {
+define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
+  store volatile i32 1, i32 addrspace(1)* %arg0
   call void @llvm.trap()
+  unreachable
+  store volatile i32 2, i32 addrspace(1)* %arg0
   ret void
 }
 
@@ -84,10 +92,10 @@
 ; TRAP-BIT: enable_trap_handler = 1
 ; NO-TRAP-BIT: enable_trap_handler = 0
 
-; HSA: BB{{[0-9]_[0-9]+]]: ; %trap
+; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap
 ; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
 ; HSA-TRAP-NEXT: s_trap 2
-define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr #1 {
+define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr {
 entry:
   %tmp29 = load volatile i32, i32 addrspace(1)* %arg0
   %cmp = icmp eq i32 %tmp29, -1
@@ -98,7 +106,9 @@
   unreachable
 
 ret:
+  store volatile i32 3, i32 addrspace(1)* %arg0
   ret void
 }
 
 attributes #0 = { nounwind noreturn }
+attributes #1 = { nounwind }