AMDGPU: Annotate implicitarg.ptr usage
We need to pass something to functions for this to work.
It isn't derivable just from the kernarg segment pointer
because the implicit arguments are placed after the
kernel arguments.
Also fixes missing test for the intrinsic.
llvm-svn: 309398
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index c68e586..551737c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -156,8 +156,9 @@
case Intrinsic::amdgcn_dispatch_id:
return "amdgpu-dispatch-id";
case Intrinsic::amdgcn_kernarg_segment_ptr:
- case Intrinsic::amdgcn_implicitarg_ptr:
return "amdgpu-kernarg-segment-ptr";
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return "amdgpu-implicitarg-ptr";
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::trap:
case Intrinsic::debugtrap:
@@ -190,7 +191,8 @@
{ "amdgpu-work-group-id-z" },
{ "amdgpu-dispatch-ptr" },
{ "amdgpu-dispatch-id" },
- { "amdgpu-kernarg-segment-ptr" }
+ { "amdgpu-kernarg-segment-ptr" },
+ { "amdgpu-implicitarg-ptr" }
};
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 389fdc9..2737ef9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -764,7 +764,8 @@
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
+ unsigned getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 56db67c..9fb1bdb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -899,6 +899,13 @@
DAG.getConstant(Offset, SL, PtrVT));
}
+SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
+ const SDLoc &SL) const {
+ auto MFI = DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+ uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+ return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
+}
+
SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
@@ -3029,8 +3036,9 @@
TRI->getPreloadedValue(MF, Reg), VT);
}
case Intrinsic::amdgcn_implicitarg_ptr: {
- unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
- return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ if (MFI->isEntryFunction())
+ return getImplicitArgPtr(DAG, DL);
+ report_fatal_error("amdgcn.implicitarg.ptr not implemented for functions");
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 30482dc..b703ced 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -23,6 +23,7 @@
class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
+ SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
uint64_t Offset, bool Signed,
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a7c8166..04e57be 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -93,11 +93,17 @@
// FIXME: Not really a system SGPR.
PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ ImplicitArgPtr = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ KernargSegmentPtr = true;
}
CallingConv::ID CC = F->getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = !F->arg_empty();
+ if (!F->arg_empty())
+ KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4c7f38a..8511403 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -186,6 +186,10 @@
// Other shaders indirect 64-bits at sgpr[0:1]
bool ImplicitBufferPtr : 1;
+ // Pointer to where the ABI inserts special kernel arguments separate from the
+ // user arguments. This is an offset from the KernargSegmentPtr.
+ bool ImplicitArgPtr : 1;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -346,6 +350,10 @@
return WorkItemIDZ;
}
+ bool hasImplicitArgPtr() const {
+ return ImplicitArgPtr;
+ }
+
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}