AMDGPU: Add implicitarg.ptr intrinsic.
Points to the start of implicit arguments (appended after explicit arguments)
Differential Revision: http://reviews.llvm.org/D20297
llvm-svn: 273317
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index d364847..18008c67 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -334,6 +334,10 @@
GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+def int_amdgcn_implicitarg_ptr :
+ GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
def int_amdgcn_interp_p1 :
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index ecf69f8..d4b3766 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -201,8 +201,9 @@
unsigned Reg, EVT VT) const;
enum ImplicitParameter {
- GRID_DIM,
- GRID_OFFSET
+ FIRST_IMPLICIT,
+ GRID_DIM = FIRST_IMPLICIT,
+ GRID_OFFSET,
};
/// \brief Helper function that returns the byte offset of the given
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2af8e51..3bac7dd 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -534,24 +534,29 @@
return TargetLowering::isTypeDesirableForOp(Op, VT);
}
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- const SDLoc &SL, SDValue Chain,
- unsigned Offset, bool Signed) const {
+SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
+ const SDLoc &SL, SDValue Chain,
+ unsigned Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
- Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(Offset, SL, PtrVT));
+ return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(Offset, SL, PtrVT));
+}
+SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Chain,
+ unsigned Offset, bool Signed) const {
+ const DataLayout &DL = DAG.getDataLayout();
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue PtrOffset = DAG.getUNDEF(PtrVT);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
@@ -561,6 +566,7 @@
if (MemVT.isFloatingPoint())
ExtTy = ISD::EXTLOAD;
+ SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
return DAG.getLoad(ISD::UNINDEXED, ExtTy,
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
false, // isVolatile
@@ -1540,6 +1546,10 @@
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
TRI->getPreloadedValue(MF, Reg), VT);
}
+ case Intrinsic::amdgcn_implicitarg_ptr: {
+ unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+ return LowerParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ }
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
= TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 6831e89..4f709a5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -21,7 +21,9 @@
namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
- SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &DL,
+ SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
+ unsigned Offset) const;
+ SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
SDValue Chain, unsigned Offset, bool Signed) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index eaffe8c..07650d9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -15,7 +15,20 @@
ret void
}
+; ALL-LABEL: {{^}}test_implicit:
+; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
+; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
+define void @test_implicit(i32 addrspace(1)* %out) #1 {
+ %implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
+ %gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(2)* %gep
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }