ComputeMaskedBits: Make knownzero computation more aggressive for ctlz with undef zero.
unsigned foo(unsigned x) { return 31 - __builtin_clz(x); }
now compiles into a single "bsrl" instruction on x86.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147255 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4739d1b..b9a48ea 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -714,10 +714,17 @@
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
- case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz: {
unsigned LowBits = Log2_32(BitWidth)+1;
+ // If this call is undefined for 0, the result will be less than 2^n.
+ if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
+ LowBits -= 1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ break;
+ }
+ case Intrinsic::ctpop: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index e31bd7d..7ae6d5a 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -181,10 +181,10 @@
define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
entry:
- %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true) nounwind readnone
+ %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
%lz.cmp = icmp eq i32 %lz, 32
store volatile i1 %lz.cmp, i1* %c
- %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 true) nounwind readnone
+ %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) nounwind readnone
%tz.cmp = icmp ne i32 %tz, 32
store volatile i1 %tz.cmp, i1* %c
%pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
@@ -203,7 +203,7 @@
define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
- %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) ; <i32> [#uses=1]
+ %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) ; <i32> [#uses=1]
%shr3 = lshr i32 %tmp1, 5 ; <i32> [#uses=1]
ret i32 %shr3
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index 41e01fb..bfa9f40 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -10,3 +10,16 @@
; CHECK-NEXT: xor i32 %and, 63
; CHECK-NEXT: ret
}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @test2(i32 %x) nounwind {
+ %count = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) nounwind readnone
+ %sub = sub i32 31, %count
+ ret i32 %sub
+
+; CHECK: @test2
+; CHECK-NEXT: ctlz
+; CHECK-NEXT: xor i32 %count, 31
+; CHECK-NEXT: ret
+}