AMDGPU: Fix emitting multiple stack loads for stack passed workitems
The same stack is loaded for each workitem ID, and each use. Nothing
prevents you from creating multiple fixed stack objects with the same
offsets, so this was creating a load for each unique frame index,
despite them being the same offset. Re-use the same frame index so the
loads are CSEable.
llvm-svn: 371148
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a9596b9..04f1da2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4162,14 +4162,28 @@
return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
}
+// This may be called multiple times, and nothing prevents creating multiple
+// objects at the same offset. See if we already defined this object.
+static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
+ int64_t Offset) {
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ if (MFI.getObjectOffset(I) == Offset) {
+ assert(MFI.getObjectSize(I) == Size);
+ return I;
+ }
+ }
+
+ return MFI.CreateFixedObject(Size, Offset, true);
+}
+
SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
EVT VT,
const SDLoc &SL,
int64_t Offset) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset);
- int FI = MFI.CreateFixedObject(VT.getStoreSize(), Offset, true);
auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);