[InstSimplify] simplifyUnsignedRangeCheck(): if we know that X != 0, handle more cases (PR43246)

Summary:
This is motivated by D67122 sanitizer check enhancement.
That patch seemingly worsens `-fsanitize=pointer-overflow`
overhead from 25% to 50%, which strongly implies missing folds.

In this particular case, given
```
char* test(char& base, unsigned long offset) {
  return &base + offset;
}
```
it will end up producing something like
https://godbolt.org/z/LK5-iH
which after optimizations reduces down to roughly
```
define i1 @t0(i8* nonnull %base, i64 %offset) {
  %base_int = ptrtoint i8* %base to i64
  %adjusted = add i64 %base_int, %offset
  %non_null_after_adjustment = icmp ne i64 %adjusted, 0
  %no_overflow_during_adjustment = icmp uge i64 %adjusted, %base_int
  %res = and i1 %non_null_after_adjustment, %no_overflow_during_adjustment
  ret i1 %res
}
```
Without D67122 there was no `%non_null_after_adjustment`,
and in this particular case we can get rid of the overhead:

Here we add some offset to a non-null pointer,
and check that the result does not overflow and is not a null pointer.
But since the base pointer is already non-null, and we check for overflow,
that overflow check will already catch the null pointer,
so the separate null check is redundant and can be dropped.

Alive proofs:
https://rise4fun.com/Alive/WRzq

There are more patterns of "unsigned-add-with-overflow", they are not handled here,
but this is the main pattern, that we currently consider canonical,
so it makes sense to handle it.

https://bugs.llvm.org/show_bug.cgi?id=43246

Reviewers: spatel, nikic, vsk

Reviewed By: spatel

Subscribers: hiraditya, llvm-commits, reames

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D67332

llvm-svn: 371349
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index df87b94..7960635 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1371,7 +1371,8 @@
 /// Commuted variants are assumed to be handled by calling this function again
 /// with the parameters swapped.
 static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
-                                         ICmpInst *UnsignedICmp, bool IsAnd) {
+                                         ICmpInst *UnsignedICmp, bool IsAnd,
+                                         const DataLayout &DL) {
   Value *X, *Y;
 
   ICmpInst::Predicate EqPred;
@@ -1395,6 +1396,18 @@
   if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
     return IsAnd ? UnsignedICmp : ZeroICmp;
 
+  // X <= Y && Y != 0  -->  X <= Y  iff X != 0
+  // X <= Y || Y != 0  -->  Y != 0  iff X != 0
+  if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
+      isKnownNonZero(X, DL))
+    return IsAnd ? UnsignedICmp : ZeroICmp;
+
+  // X > Y && Y == 0  -->  Y == 0  iff X != 0
+  // X > Y || Y == 0  -->  X > Y   iff X != 0
+  if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
+      isKnownNonZero(X, DL))
+    return IsAnd ? ZeroICmp : UnsignedICmp;
+
   // X >= Y || Y != 0  -->  true
   // X >= Y || Y == 0  -->  X >= Y
   if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) {
@@ -1587,10 +1600,11 @@
 }
 
 static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
-                                 const InstrInfoQuery &IIQ) {
-  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+                                 const InstrInfoQuery &IIQ,
+                                 const DataLayout &DL) {
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, DL))
     return X;
-  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
+  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, DL))
     return X;
 
   if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
@@ -1660,10 +1674,11 @@
 }
 
 static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
-                                const InstrInfoQuery &IIQ) {
-  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+                                const InstrInfoQuery &IIQ,
+                                const DataLayout &DL) {
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, DL))
     return X;
-  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
+  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, DL))
     return X;
 
   if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
@@ -1738,8 +1753,8 @@
   auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
   auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
   if (ICmp0 && ICmp1)
-    V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ)
-              : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ);
+    V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ, Q.DL)
+              : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ, Q.DL);
 
   auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
   auto *FCmp1 = dyn_cast<FCmpInst>(Op1);