AMDGPU: Use unsigned compare for eq/ne
For some reason there are both of these available, except
for scalar 64-bit compares which only has u64. I'm not sure
why there are both (I'm guessing it's for the one bit inputs we
don't use), but for consistency always using the
unsigned one.
llvm-svn: 282832
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0fed33e..da725da 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -407,7 +407,7 @@
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
.addImm(0)
.addReg(SrcReg, getKillRegState(KillSrc));
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index e4114ad..95c30a5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -882,12 +882,12 @@
def : Pat <
(i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)
+ (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), 1)
>;
def : Pat <
(i1 (trunc i64:$a)),
- (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
+ (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
(EXTRACT_SUBREG $a, sub0)), 1)
>;
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index dc1d20d..396764b 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -131,7 +131,7 @@
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_U32_e64))
.addOperand(Dst)
.addOperand(Src)
.addImm(0);
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 1a0f7d4..010c909 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -237,13 +237,13 @@
return;
// eq/ne is special because the imm16 can be treated as signed or unsigned,
- // and initially selectd to the signed versions.
- if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) {
+ // and initially selectd to the unsigned versions.
+ if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
bool HasUImm;
if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
- if (HasUImm) {
- SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ?
- AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32;
+ if (!HasUImm) {
+ SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
+ AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
}
MI.setDesc(TII->get(SOPKOpc));
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 9744cd3..5a42016 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -644,8 +644,8 @@
class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
: SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
-def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>;
-def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>;
+def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32">;
+def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32">;
def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>;
def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>;
def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 30e76aa..8014a2d 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -320,10 +320,10 @@
defm V_CMP_F_I32 : VOPC_I32 <"v_cmp_f_i32">;
defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
-defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32", COND_EQ>;
+defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32">;
defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
defm V_CMP_GT_I32 : VOPC_I32 <"v_cmp_gt_i32", COND_SGT>;
-defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32", COND_NE>;
+defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32">;
defm V_CMP_GE_I32 : VOPC_I32 <"v_cmp_ge_i32", COND_SGE>;
defm V_CMP_T_I32 : VOPC_I32 <"v_cmp_t_i32">;
@@ -338,10 +338,10 @@
defm V_CMP_F_I64 : VOPC_I64 <"v_cmp_f_i64">;
defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
-defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64", COND_EQ>;
+defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64">;
defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
defm V_CMP_GT_I64 : VOPC_I64 <"v_cmp_gt_i64", COND_SGT>;
-defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64", COND_NE>;
+defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64">;
defm V_CMP_GE_I64 : VOPC_I64 <"v_cmp_ge_i64", COND_SGE>;
defm V_CMP_T_I64 : VOPC_I64 <"v_cmp_t_i64">;
@@ -460,8 +460,8 @@
(inst $src0, $src1)
>;
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I32_e64, i32>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_I32_e64, i32>;
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
def : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
def : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
def : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
@@ -471,8 +471,8 @@
def : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
def : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I64_e64, i64>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_I64_e64, i64>;
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U64_e64, i64>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U64_e64, i64>;
def : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
def : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
def : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;