[ARM] Fix for PR39060
When calculating whether a value can safely overflow for use by an
icmp, we weren't checking that the value couldn't wrap around. To do
this we need the icmp to be using a constant, as well as the incoming
add or sub.
bugzilla report: https://bugs.llvm.org/show_bug.cgi?id=39060
Differential Revision: https://reviews.llvm.org/D52463
llvm-svn: 343092
diff --git a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 4b73636..fb9fad4 100644
--- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -247,41 +247,114 @@
if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
return true;
+ // We can support a, potentially, overflowing instruction (I) if:
+ // - It is only used by an unsigned icmp.
+ // - The icmp uses a constant.
+ // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
+ // around zero to become a larger number than before.
+ // - The underflowing instruction (I) also uses a constant.
+ //
+ // We can then use the two constants to calculate whether the result would
+ // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+ // just underflows the range, the icmp would give the same result whether the
+ // result has been truncated or not. We calculate this by:
+ // - Zero extending both constants, if needed, to 32-bits.
+ // - Take the absolute value of I's constant, adding this to the icmp const.
+ // - Check that this value is not out of range for small type. If it is, it
+ // means that it has underflowed enough to wrap around the icmp constant.
+ //
+ // For example:
+ //
+ // %sub = sub i8 %a, 2
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -2 == FE == 254
+ // But if this is evalulated as a i32
+ // %sub = -2 == FF FF FF FE == 4294967294
+ // So the unsigned compares (i8 and i32) would not yield the same result.
+ //
+ // Another way to look at it is:
+ // %a - 2 <= 254
+ // %a + 2 <= 254 + 2
+ // %a <= 256
+ // And we can't represent 256 in the i8 format, so we don't support it.
+ //
+ // Whereas:
+ //
+ // %sub i8 %a, 1
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -1 == FF == 255
+ // As i32:
+ // %sub = -1 == FF FF FF FF == 4294967295
+ //
+ // In this case, the unsigned compare results would be the same and this
+ // would also be true for ult, uge and ugt:
+ // - (255 < 254) == (0xFFFFFFFF < 254) == false
+ // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+ // - (255 > 254) == (0xFFFFFFFF > 254) == true
+ // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+ //
+ // To demonstrate why we can't handle increasing values:
+ //
+ // %add = add i8 %a, 2
+ // %cmp = icmp ult i8 %add, 127
+ //
+ // If %a = 254, %add = 256 == (i8 1)
+ // As i32:
+ // %add = 256
+ //
+ // (1 < 127) != (256 < 127)
+
unsigned Opc = I->getOpcode();
- if (Opc == Instruction::Add || Opc == Instruction::Sub) {
- // We don't care if the add or sub could wrap if the value is decreasing
- // and is only being used by an unsigned compare.
- if (!I->hasOneUse() ||
- !isa<ICmpInst>(*I->user_begin()) ||
- !isa<ConstantInt>(I->getOperand(1)))
+ if (Opc != Instruction::Add && Opc != Instruction::Sub)
+ return false;
+
+ if (!I->hasOneUse() ||
+ !isa<ICmpInst>(*I->user_begin()) ||
+ !isa<ConstantInt>(I->getOperand(1)))
+ return false;
+
+ ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
+ bool NegImm = OverflowConst->isNegative();
+ bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
+ ((Opc == Instruction::Add) && NegImm);
+ if (!IsDecreasing)
+ return false;
+
+ // Don't support an icmp that deals with sign bits.
+ auto *CI = cast<ICmpInst>(*I->user_begin());
+ if (CI->isSigned() || CI->isEquality())
+ return false;
+
+ ConstantInt *ICmpConst = nullptr;
+ if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
+ ICmpConst = Const;
+ else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
+ ICmpConst = Const;
+ else
+ return false;
+
+ // Now check that the result can't wrap on itself.
+ APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
+ ICmpConst->getValue().zext(32) : ICmpConst->getValue();
+
+ Total += OverflowConst->getValue().getBitWidth() < 32 ?
+ OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
+
+ APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
+
+ if (Total.getBitWidth() > Max.getBitWidth()) {
+ if (Total.ugt(Max.zext(Total.getBitWidth())))
return false;
-
- auto *CI = cast<ICmpInst>(*I->user_begin());
-
- // Don't support an icmp that deals with sign bits, including negative
- // immediates
- if (CI->isSigned())
+ } else if (Max.getBitWidth() > Total.getBitWidth()) {
+ if (Total.zext(Max.getBitWidth()).ugt(Max))
return false;
+ } else if (Total.ugt(Max))
+ return false;
- if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
- if (Const->isNegative())
- return false;
-
- if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
- if (Const->isNegative())
- return false;
-
- bool NegImm = cast<ConstantInt>(I->getOperand(1))->isNegative();
- bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
- ((Opc == Instruction::Add) && NegImm);
- if (!IsDecreasing)
- return false;
-
- LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
- return true;
- }
-
- return false;
+ LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+ return true;
}
static bool shouldPromote(Value *V) {
@@ -459,6 +532,8 @@
if (!shouldPromote(V) || isPromotedResultSafe(V))
continue;
+ assert(EnableDSP && "DSP intrinisc insertion not enabled!");
+
// Replace unsafe instructions with appropriate intrinsic calls.
InsertDSPIntrinsic(cast<Instruction>(V));
}