AMDGPU: Don't look for DS merge candidates with one use address
The merge is only possible if the base address register is the
same for the two instructions. If there is only the one use,
there's no point in doing an expensive forward scan checking
for memory interference looking for a merge candidate.
This gives a signficant improvement in one extreme testcase.
The code to do the scan is still algorithmically terrible,
so this is still the slowest pass in that example.
llvm-svn: 312096
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index bb803b2..1b2e5e6 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -251,6 +251,16 @@
bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
MachineBasicBlock::iterator E = CI.I->getParent()->end();
MachineBasicBlock::iterator MBBI = CI.I;
+
+ int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
+ AMDGPU::OpName::addr);
+ const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
+
+ // We only ever merge operations with the same base address register, so don't
+ // bother scanning forward if there are no other uses.
+ if (MRI->hasOneNonDBGUse(AddrReg0.getReg()))
+ return false;
+
++MBBI;
SmallVector<const MachineOperand *, 8> DefsToMove;
@@ -300,9 +310,6 @@
if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove))
continue;
- int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
- AMDGPU::OpName::addr);
- const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
// Check same base pointer. Be careful of subregisters, which can occur with