AMDGPU: Fix immediate folding logic when shrinking instructions
If the literal is being folded into src0, it doesn't matter
if it's an SGPR because it's being replaced with the literal.
Also fixes initially selecting 32-bit versions of some instructions
which also confused commuting.
llvm-svn: 281117
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c84847f..5e0d34d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1852,13 +1852,13 @@
case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
- case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
- case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
- case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
- case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
- case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
- case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
- case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
+ case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
+ case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
+ case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
+ case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
+ case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
+ case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
+ case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 94506f2..b8f3c10 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1871,7 +1871,7 @@
def : Pat <
(fneg (fabs f32:$src)),
- (S_OR_B32 $src, 0x80000000) // Set sign bit
+ (S_OR_B32 $src, (S_MOV_B32 0x80000000)) // Set sign bit
>;
// FIXME: Should use S_OR_B32
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index c891af7..e72b7d4 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -134,7 +134,6 @@
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
@@ -144,12 +143,6 @@
TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
return;
- // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
- // SGPR, we cannot commute the instruction, so we can't fold any literal
- // constants.
- if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
- return;
-
// Try to fold Src0
if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
unsigned Reg = Src0.getReg();
@@ -158,7 +151,8 @@
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
- if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
+ if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
+ isUInt<32>(MovSrc.getImm()))) {
Src0.ChangeToImmediate(MovSrc.getImm());
ConstantFolded = true;
}