[AMDGPU] Resubmit SDWA peephole: enable by default
Reviewers: vpykhtin, rampitec, arsenm
Subscribers: qcolombet, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D31671
llvm-svn: 299654
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c6c20b8..f03adfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -105,7 +105,7 @@
static cl::opt<bool> EnableSDWAPeephole(
"amdgpu-sdwa-peephole",
cl::desc("Enable SDWA peepholer"),
- cl::init(false));
+ cl::init(true));
// Enable address space based alias analysis
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 67c86c3..599c9d7 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -233,11 +233,10 @@
if (SuperReg.getReg() != SubReg.getReg())
return false;
- LaneBitmask::Type SuperMask =
- TRI->getSubRegIndexLaneMask(SuperReg.getSubReg()).getAsInteger();
- LaneBitmask::Type SubMask =
- TRI->getSubRegIndexLaneMask(SubReg.getSubReg()).getAsInteger();
- return TRI->regmaskSubsetEqual(&SubMask, &SuperMask);
+ LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg());
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg());
+ SuperMask |= ~SubMask;
+ return SuperMask.all();
}
uint64_t SDWASrcOperand::getSrcMods() const {