[InstCombine] Simplify cttz/ctlz + icmp ugt/ult
Followup to D55745, this time handling comparisons with ugt and ult
predicates (which are the canonical forms for non-equality predicates).
For ctlz we can convert into a simple icmp, for cttz we can convert
into a mask check.
Differential Revision: https://reviews.llvm.org/D56355
llvm-svn: 351645
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c3a7b62..81a89e6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2610,8 +2610,9 @@
return I;
}
- if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
- return I;
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
+ if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
+ return I;
return nullptr;
}
@@ -2755,14 +2756,10 @@
return nullptr;
}
-/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
-Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
- const APInt &C) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
- if (!II || !Cmp.isEquality())
- return nullptr;
-
- // Handle icmp {eq|ne} <intrinsic>, Constant.
+/// Fold an equality icmp with LLVM intrinsic and constant operand.
+Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
+ IntrinsicInst *II,
+ const APInt &C) {
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
switch (II->getIntrinsicID()) {
@@ -2822,6 +2819,65 @@
return nullptr;
}
+/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
+Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
+ IntrinsicInst *II,
+ const APInt &C) {
+ if (Cmp.isEquality())
+ return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
+
+ Type *Ty = II->getType();
+ unsigned BitWidth = C.getBitWidth();
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::ctlz: {
+ // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
+ if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ unsigned Num = C.getLimitedValue();
+ APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
+ II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+ }
+
+ // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
+ if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+ C.uge(1) && C.ule(BitWidth)) {
+ unsigned Num = C.getLimitedValue();
+ APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
+ II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+ }
+ break;
+ }
+ case Intrinsic::cttz: {
+ // Limit to one use to ensure we don't increase instruction count.
+ if (!II->hasOneUse())
+ return nullptr;
+
+ // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
+ if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
+ Builder.CreateAnd(II->getArgOperand(0), Mask),
+ ConstantInt::getNullValue(Ty));
+ }
+
+ // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
+ if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+ C.uge(1) && C.ule(BitWidth)) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
+ Builder.CreateAnd(II->getArgOperand(0), Mask),
+ ConstantInt::getNullValue(Ty));
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return nullptr;
+}
+
/// Handle icmp with constant (but not simple integer constant) RHS.
Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);